##——— call libraries –

library("genefilter")
library("ggplot2")
library("grid");  library("reshape2")
library("ggrepel")
library("RColorBrewer")
library("DESeq2")
## Loading required package: S4Vectors
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which, which.max, which.min
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
## 
##     expand.grid
## Loading required package: IRanges
## 
## Attaching package: 'IRanges'
## The following object is masked from 'package:grDevices':
## 
##     windows
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: DelayedArray
## Loading required package: matrixStats
## 
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
## 
##     anyMissing, rowMedians
## The following objects are masked from 'package:genefilter':
## 
##     rowSds, rowVars
## 
## Attaching package: 'DelayedArray'
## The following objects are masked from 'package:matrixStats':
## 
##     colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges
## The following objects are masked from 'package:base':
## 
##     aperm, apply, rowsum
library("BiocParallel")
library("WGCNA")
## Loading required package: dynamicTreeCut
## Loading required package: fastcluster
## 
## Attaching package: 'fastcluster'
## The following object is masked from 'package:stats':
## 
##     hclust
## 
## 
## Attaching package: 'WGCNA'
## The following object is masked from 'package:IRanges':
## 
##     cor
## The following object is masked from 'package:S4Vectors':
## 
##     cor
## The following object is masked from 'package:stats':
## 
##     cor
library("Rtsne")
library("pheatmap")
library("tools")
library("viridis")
## Loading required package: viridisLite
library("scales")
## 
## Attaching package: 'scales'
## The following object is masked from 'package:viridis':
## 
##     viridis_pal
library("gridExtra")
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:Biobase':
## 
##     combine
## The following object is masked from 'package:BiocGenerics':
## 
##     combine
library("GSVA")
library("GSEABase")
## Loading required package: annotate
## Loading required package: AnnotationDbi
## Loading required package: XML
## 
## Attaching package: 'XML'
## The following object is masked from 'package:tools':
## 
##     toHTML
## Loading required package: graph
## 
## Attaching package: 'graph'
## The following object is masked from 'package:XML':
## 
##     addNode
library("stringr")
## 
## Attaching package: 'stringr'
## The following object is masked from 'package:graph':
## 
##     boundary
library("dplyr")
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:GSEABase':
## 
##     intersect, setdiff, union
## The following object is masked from 'package:graph':
## 
##     union
## The following object is masked from 'package:AnnotationDbi':
## 
##     select
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following object is masked from 'package:matrixStats':
## 
##     count
## The following object is masked from 'package:Biobase':
## 
##     combine
## The following objects are masked from 'package:GenomicRanges':
## 
##     intersect, setdiff, union
## The following object is masked from 'package:GenomeInfoDb':
## 
##     intersect
## The following objects are masked from 'package:IRanges':
## 
##     collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
## 
##     first, intersect, rename, setdiff, setequal, union
## The following objects are masked from 'package:BiocGenerics':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library("rstatix")
## 
## Attaching package: 'rstatix'
## The following object is masked from 'package:AnnotationDbi':
## 
##     select
## The following object is masked from 'package:IRanges':
## 
##     desc
## The following object is masked from 'package:genefilter':
## 
##     Anova
## The following object is masked from 'package:stats':
## 
##     filter
library("tidyr")
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:S4Vectors':
## 
##     expand
## The following object is masked from 'package:reshape2':
## 
##     smiths
source('D:/Pembro-Fluvac/Analysis/Ranalysis/PembroFluSpec_PlottingFunctions.R')
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:IRanges':
## 
##     space
## The following object is masked from 'package:S4Vectors':
## 
##     space
## The following object is masked from 'package:stats':
## 
##     lowess
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:rstatix':
## 
##     select
## The following object is masked from 'package:dplyr':
## 
##     select
## The following object is masked from 'package:AnnotationDbi':
## 
##     select
## The following object is masked from 'package:genefilter':
## 
##     area
## Warning: package 'ggbeeswarm' was built under R version 4.0.3
sessionInfo()
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19044)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.1252 
## [2] LC_CTYPE=English_United States.1252   
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.1252    
## 
## attached base packages:
##  [1] tools     parallel  stats4    grid      stats     graphics  grDevices
##  [8] utils     datasets  methods   base     
## 
## other attached packages:
##  [1] ggbeeswarm_0.6.0            e1071_1.7-3                
##  [3] MASS_7.3-51.6               gplots_3.0.4               
##  [5] tidyr_1.1.1                 rstatix_0.6.0              
##  [7] dplyr_1.0.2                 stringr_1.4.0              
##  [9] GSEABase_1.50.1             graph_1.66.0               
## [11] annotate_1.66.0             XML_3.99-0.5               
## [13] AnnotationDbi_1.50.3        GSVA_1.36.2                
## [15] gridExtra_2.3               scales_1.1.1               
## [17] viridis_0.5.1               viridisLite_0.3.0          
## [19] pheatmap_1.0.12             Rtsne_0.15                 
## [21] WGCNA_1.69                  fastcluster_1.1.25         
## [23] dynamicTreeCut_1.63-1       BiocParallel_1.22.0        
## [25] DESeq2_1.28.1               SummarizedExperiment_1.18.2
## [27] DelayedArray_0.14.1         matrixStats_0.56.0         
## [29] Biobase_2.48.0              GenomicRanges_1.40.0       
## [31] GenomeInfoDb_1.24.2         IRanges_2.22.2             
## [33] S4Vectors_0.26.1            BiocGenerics_0.34.0        
## [35] RColorBrewer_1.1-2          ggrepel_0.8.2              
## [37] reshape2_1.4.4              ggplot2_3.3.2              
## [39] genefilter_1.70.0          
## 
## loaded via a namespace (and not attached):
##  [1] readxl_1.3.1           backports_1.1.9        Hmisc_4.4-1           
##  [4] plyr_1.8.6             splines_4.0.2          digest_0.6.25         
##  [7] foreach_1.5.0          htmltools_0.5.0        GO.db_3.11.4          
## [10] gdata_2.18.0           magrittr_1.5           checkmate_2.0.0       
## [13] memoise_1.1.0          cluster_2.1.0          doParallel_1.0.15     
## [16] openxlsx_4.1.5         jpeg_0.1-8.1           colorspace_1.4-1      
## [19] blob_1.2.1             haven_2.3.1            xfun_0.15             
## [22] crayon_1.3.4           RCurl_1.98-1.2         impute_1.62.0         
## [25] survival_3.2-11        iterators_1.0.12       glue_1.4.1            
## [28] gtable_0.3.0           zlibbioc_1.34.0        XVector_0.28.0        
## [31] car_3.0-9              abind_1.4-5            DBI_1.1.0             
## [34] Rcpp_1.0.8.3           xtable_1.8-4           htmlTable_2.0.1       
## [37] foreign_0.8-80         bit_4.0.4              preprocessCore_1.50.0 
## [40] Formula_1.2-3          htmlwidgets_1.5.3      ellipsis_0.3.1        
## [43] pkgconfig_2.0.3        nnet_7.3-14            locfit_1.5-9.4        
## [46] tidyselect_1.1.0       rlang_0.4.7            later_1.1.0.1         
## [49] munsell_0.5.0          cellranger_1.1.0       generics_0.0.2        
## [52] RSQLite_2.2.0          broom_0.7.0            evaluate_0.14         
## [55] fastmap_1.0.1          yaml_2.2.1             knitr_1.29            
## [58] bit64_4.0.2            zip_2.1.0              caTools_1.18.0        
## [61] purrr_0.3.4            mime_0.9               compiler_4.0.2        
## [64] shinythemes_1.1.2      rstudioapi_0.11        beeswarm_0.2.3        
## [67] curl_4.3               png_0.1-7              tibble_3.0.3          
## [70] geneplotter_1.66.0     stringi_1.4.6          highr_0.8             
## [73] forcats_0.5.0          lattice_0.20-41        Matrix_1.4-0          
## [76] vctrs_0.3.2            pillar_1.4.6           lifecycle_0.2.0       
## [79] data.table_1.13.0      bitops_1.0-6           httpuv_1.5.4          
## [82] R6_2.4.1               latticeExtra_0.6-29    promises_1.1.1        
## [85] KernSmooth_2.23-17     rio_0.5.16             vipor_0.4.5           
## [88] codetools_0.2-16       gtools_3.8.2           withr_2.2.0           
## [91] GenomeInfoDbData_1.2.3 hms_0.5.3              rpart_4.1-15          
## [94] class_7.3-17           rmarkdown_2.7          carData_3.0-4         
## [97] shiny_1.5.0            base64enc_0.1-3

———– Save mergedData as RDS ——————–

mergedData <- readRDS("D:/Pembro-Fluvac/Analysis/mergedData/mergedData.RDS")
demog <- readRDS("D:/Pembro-Fluvac/Analysis/mergedData/demog.RDS")
serology <- readRDS("D:/Pembro-Fluvac/Analysis/mergedData/serology.RDS")

———– Cohort 2 description ——————–

table(demog$Current.Immunotherapy[which(demog$Current.Immunotherapy != "Ipi/Nivo")])
## 
##                   Nivolumab Pembrolizumab 
##            27             8            22
x <- demog[which(demog$Cohort == "anti-PD-1"),] 
table(x$Sex)
## 
## Female   Male 
##     11     21
x <- demog[which(demog$Cohort == "Healthy"),] 
table(x$Sex)
## 
## Female   Male 
##     12     15
table(mergedData$Cohort, mergedData$Sex, mergedData$Year)  
## , ,  = 1
## 
##            
##             Female Male
##   Healthy        0    0
##   anti-PD-1      0    3
##   nonPD1         1    0
## 
## , ,  = 2
## 
##            
##             Female Male
##   Healthy        6    6
##   anti-PD-1      5    6
##   nonPD1         1    0
## 
## , ,  = 3
## 
##            
##             Female Male
##   Healthy        6    9
##   anti-PD-1      4   12
##   nonPD1         0    0
table(serology$Cohort, serology$TimeCategory, serology$Year, !is.na(serology$H1N1pdm09.HAI.titer))
## , ,  = 1,  = FALSE
## 
##          
##           baseline late oneWeek
##   aPD1           0    0       0
##   Healthy        0    0       0
##   nonPD1         0    0       0
## 
## , ,  = 2,  = FALSE
## 
##          
##           baseline late oneWeek
##   aPD1           0    1       1
##   Healthy        0    0       0
##   nonPD1         0    0       0
## 
## , ,  = 3,  = FALSE
## 
##          
##           baseline late oneWeek
##   aPD1           0    0       0
##   Healthy        0    0       0
##   nonPD1         0    0       0
## 
## , ,  = 1,  = TRUE
## 
##          
##           baseline late oneWeek
##   aPD1           4    4       3
##   Healthy        0    0       0
##   nonPD1         0    0       0
## 
## , ,  = 2,  = TRUE
## 
##          
##           baseline late oneWeek
##   aPD1          12    8       2
##   Healthy       12   12      12
##   nonPD1         3    3       1
## 
## , ,  = 3,  = TRUE
## 
##          
##           baseline late oneWeek
##   aPD1          16   13      16
##   Healthy       15   15      15
##   nonPD1         1    1       1
subsetData <- subset(demog, Current.Immunotherapy %in% c("Pembrolizumab","Nivolumab"))
table(subsetData$Current.Immunotherapy)
## 
##     Nivolumab Pembrolizumab 
##             8            22
table(subsetData$Cohort, subsetData$Sex)
##            
##             Female Male
##   anti-PD-1      9   21
table(demog$Cohort, demog$Sex)
##            
##             Female Male
##   anti-PD-1     11   21
##   Healthy       12   15
subsetData <- subset(mergedData, Cohort == "anti-PD-1" & TimeCategory == "baseline")
ggplot( data = subsetData, aes(x=Cycle.of.Immunotherapy)) + geom_histogram(color="white",fill="#FFB18C") + 
  ggtitle("Cohort 2 - anti-PD1 cohort") +  theme_bw() + 
  theme(axis.text=element_text(color="black",size=18), axis.title=element_text(color="black",size=24), plot.title = element_text(size=24)) + scale_x_continuous(breaks=seq(0,30,5)) +
  theme(panel.grid.minor = element_blank(), panel.grid.major=element_blank())
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing non-finite values (stat_bin).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/CyclesImmunotherapy.pdf", device = 'pdf', height=3, width=8)
# write.csv(x = subsetData[,c("dbCode", "Cohort", "TimeCategory", "Cycle.of.Immunotherapy")], file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e1b-2.csv")

median(mergedData[which(mergedData$Cohort == "anti-PD-1" & mergedData$TimeCategory == "baseline"), 'Cycle.of.Immunotherapy'], na.rm = T)  
## [1] 7
sd(mergedData[which(mergedData$Cohort == "anti-PD-1" & mergedData$TimeCategory == "baseline"), 'Cycle.of.Immunotherapy'], na.rm = T)  
## [1] 6.863131
paste("Median Age anti-PD-1: ", median(mergedData[which(mergedData$Cohort == "anti-PD-1" & mergedData$TimeCategory == "baseline"), 'Age'], na.rm = T)  )
## [1] "Median Age anti-PD-1:  61.5"
paste("Median Age Healthy: ", median(mergedData[which(mergedData$Cohort == "Healthy" & mergedData$TimeCategory == "baseline"), 'Age'], na.rm = T)   )
## [1] "Median Age Healthy:  33"
paste("Range Age anti-PD-1: ", range(mergedData[which(mergedData$Cohort == "anti-PD-1" & mergedData$TimeCategory == "baseline"), 'Age'], na.rm = T)  )
## [1] "Range Age anti-PD-1:  37" "Range Age anti-PD-1:  81"
paste("Range Age Healthy: ", range(mergedData[which(mergedData$Cohort == "Healthy" & mergedData$TimeCategory == "baseline"), 'Age'], na.rm = T)   )
## [1] "Range Age Healthy:  23" "Range Age Healthy:  47"

## ———– Tfh dynamics ——————–

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "baseline")
twoSampleBar(data=subsetData, xData="Cohort", yData="cTfh_ICOShiCD38hi_..FreqParent", fillParam="Cohort", title="cTfh +/+ at baseline", yLabel="ICOS+CD38+ cTfh frequency at d0")
## [1] "path 1:  ttest == T && batch == none && nonparam == F"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"
## Warning: Removed 8 rows containing missing values (position_quasirandom).

## excluding PD-1 in the definition of cTfh
subsetData <- subset(mergedData, TimeCategory == "oneWeek" & Cohort != "nonPD1" & cTfh_ICOShiCD38hi_..FreqParent > 0)
twoSampleBar(data=subsetData, xData="Cohort", yData="FCtfh_oW", fillParam="Cohort", title="ICOS+CD38+\ncTfh", yLabel="Fold-change at one week", FCplot=T, ttest = F) + 
  scale_y_continuous(breaks=seq(0,99,1)) + theme(axis.title.y = element_text(vjust=1.9)) + 
  ggtitle(expression(paste(paste("ICO",S^'+', "CD3",8^'+' ), " cTfh")))
## [1] "path 3:  ttest == F"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfhResponse_foldchange_AllYrs.pdf", device="pdf", width=3.8, height=7)
subsetData %>% group_by(Cohort) %>% shapiro_test(FCtfh_oW)
## # A tibble: 2 x 4
##   Cohort    variable statistic        p
##   <fct>     <chr>        <dbl>    <dbl>
## 1 Healthy   FCtfh_oW     0.987 0.973   
## 2 anti-PD-1 FCtfh_oW     0.746 0.000148
wilcox_test(subsetData, FCtfh_oW ~ Cohort)
## # A tibble: 1 x 7
##   .y.      group1  group2       n1    n2 statistic      p
## * <chr>    <chr>   <chr>     <int> <int>     <dbl>  <dbl>
## 1 FCtfh_oW Healthy anti-PD-1    27    20       180 0.0535
# write.csv(x = subsetData[,c("dbCode", "Cohort", "TimeCategory", "FCtfh_oW")], file="D:/Pembro-Fluvac/Analysis/sourceDataExports/1d.csv")



## including PD-1 in the definition of cTfh
subsetData <- subset(mergedData, TimeCategory == "oneWeek" & Cohort != "nonPD1" & cTfh_ICOShiCD38hi_..FreqParent > 0)
twoSampleBar(data=subsetData, xData="Cohort", yData="X5PD1_ICOSCD38_oW", fillParam="Cohort", title="ICOS+CD38+\nCXCR5+PD1+", yLabel="Fold-change at one week", FCplot=T, ttest = F) + 
  scale_y_continuous(breaks=seq(0,99,1)) + theme(axis.title.y = element_text(vjust=1.9)) + 
  ggtitle(expression(paste(paste("PD-",1^'+',"ICO",S^'+', "CD3",8^'+' ), " cTfh")))
## [1] "path 3:  ttest == F"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"
## Warning: Removed 16 rows containing missing values (position_quasirandom).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfhResponse_X5PD1ICOSCD38_foldchange_AllYrs.pdf", device="pdf", width=3.5)
subsetData %>% group_by(Cohort) %>% shapiro_test(X5PD1_ICOSCD38_oW)
## # A tibble: 2 x 4
##   Cohort    variable          statistic        p
##   <fct>     <chr>                 <dbl>    <dbl>
## 1 Healthy   X5PD1_ICOSCD38_oW     0.978 0.958   
## 2 anti-PD-1 X5PD1_ICOSCD38_oW     0.730 0.000368
wilcox_test(subsetData, X5PD1_ICOSCD38_oW ~ Cohort)
## # A tibble: 1 x 7
##   .y.               group1  group2       n1    n2 statistic      p
## * <chr>             <chr>   <chr>     <int> <int>     <dbl>  <dbl>
## 1 X5PD1_ICOSCD38_oW Healthy anti-PD-1    27    20        56 0.0106
# write.csv(x = subsetData[,c("dbCode", "Cohort", "TimeCategory", "X5PD1_ICOSCD38_oW")], file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e1k.csv")

subsetData <- subset(mergedData, Cohort != "nonPD1" )
prePostTime(data = subsetData, xData = "TimeCategory", yData = "cTfh_ICOShiCD38hi_..FreqParent", fillParam = "Cohort", title = "cTfh response", xLabel = "TimeCategory",
            yLabel = "ICOS+CD38+ (% cTfh)", groupby = "dbCode") + scale_y_continuous(breaks=seq(0,150,2), limits=c(0,18))     + 
  ylab(expression(paste("ICO", S^'+', "CD3", 8^'+', " (% cTfh)")))
## Warning: Removed 31 rows containing non-finite values (stat_summary).
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## Warning: Removed 31 row(s) containing missing values (geom_path).
## Warning: Removed 31 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfhResp_overTime_PerSubject.pdf")
# write.csv(x = subsetData[,c("dbCode", "Cohort", "TimeCategory", "cTfh_ICOShiCD38hi_..FreqParent")], file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e1f.csv")

melted <- melt(subsetData, id.vars = c('dbCode', 'TimeCategory', 'Cohort'), measure.vars = c("cTfh_ICOShiCD38hi_..FreqParent"))
melted <- melted[which(melted$TimeCategory != 'late'),]
prePostTimeAveraged(data = melted, title = "cTfh", xLabel = NULL, yLabel = "ICOS+CD38+ (% cTfh)") + scale_y_continuous(breaks=seq(1,10,0.5), limits=c(3.0, 7))+ 
  ylab(expression(paste("ICO", S^'+', "CD3", 8^'+', " (% cTfh)")))

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfhResp_overTime_freqcTfh.pdf", width=4)     # unpaired analysis
summary( fit <- lm(formula = value ~ Cohort * TimeCategory, data = melted) );  tukey_hsd(fit)       # testing using unpaired two-way ANOVA with Tukey's posthoc
## 
## Call:
## lm(formula = value ~ Cohort * TimeCategory, data = melted)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.1895 -1.2726 -0.4255  0.8745  9.3105 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                          3.95000    0.43530   9.074 1.86e-14 ***
## Cohortanti-PD-1                     -0.04455    0.64965  -0.069   0.9455    
## TimeCategoryoneWeek                  0.61259    0.61561   0.995   0.3223    
## Cohortanti-PD-1:TimeCategoryoneWeek  1.67148    0.92475   1.807   0.0739 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.262 on 93 degrees of freedom
##   (8 observations deleted due to missingness)
## Multiple R-squared:  0.1363, Adjusted R-squared:  0.1085 
## F-statistic: 4.894 on 3 and 93 DF,  p-value: 0.003337
## # A tibble: 8 x 9
##   term  group1 group2 null.value estimate conf.low conf.high   p.adj
## * <chr> <chr>  <chr>       <dbl>    <dbl>    <dbl>     <dbl>   <dbl>
## 1 Coho~ Healt~ anti-~          0   0.765   -0.153       1.68 0.102  
## 2 Time~ basel~ oneWe~          0   1.35     0.441       2.27 0.00407
## 3 Coho~ Healt~ anti-~          0  -0.0445  -1.74        1.65 1      
## 4 Coho~ Healt~ Healt~          0   0.613   -0.998       2.22 0.753  
## 5 Coho~ Healt~ anti-~          0   2.24     0.518       3.96 0.00535
## 6 Coho~ anti-~ Healt~          0   0.657   -1.04        2.36 0.743  
## 7 Coho~ anti-~ anti-~          0   2.28     0.479       4.09 0.00715
## 8 Coho~ Healt~ anti-~          0   1.63    -0.0948      3.35 0.0711 
## # ... with 1 more variable: p.adj.signif <chr>
melted %>% group_by(Cohort) %>% kruskal_test(formula = value ~ TimeCategory)                        # testing per cohort using Kruskall-Wallis without adjustment
## # A tibble: 2 x 7
##   Cohort    .y.       n statistic    df       p method        
## * <fct>     <chr> <int>     <dbl> <int>   <dbl> <chr>         
## 1 Healthy   value    54      2.01     1 0.156   Kruskal-Wallis
## 2 anti-PD-1 value    51      8.08     1 0.00447 Kruskal-Wallis
index <- melted[melted$TimeCategory=="oneWeek", 'dbCode']
melted.paired <- melted[which(melted$dbCode %in% index), ]
prePostTimeAveraged(data = melted.paired, title = "cTfh", xLabel = NULL, yLabel = "ICOS+CD38+ (% cTfh)") + scale_y_continuous(breaks=seq(1,10,0.5), limits=c(3.0, 7))+ 
  ylab(expression(paste("ICO", S^'+', "CD3", 8^'+', " (% cTfh)")))

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfhResp_overTime_freqcTfh_paired.pdf", width=4)
# write.csv(melted.paired, file = "cTfh_responses_freqParent.csv")                                  # paired two-way ANOVA results calculated in Prism
# write.csv(x = melted.paired, file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e1g.csv")


#************** different denominator *********************
subsetData <- subset(mergedData, Cohort != "nonPD1" )
subsetData$cTfh_ICOShiCD38hi_..FreqParent <- subsetData$cTfh_ICOShiCD38hi_..FreqParent * subsetData$cTfh_..FreqParent * subsetData$Live_CD3..CD4..Nonnaive...FreqParent /10000
melted <- melt(subsetData, id.vars = c('dbCode', 'TimeCategory', 'Cohort'), measure.vars = c("cTfh_ICOShiCD38hi_..FreqParent"))
melted <- melted[which(melted$TimeCategory != 'late'),]
prePostTimeAveraged(melted, title = "cTfh", xLabel = NULL, yLabel = "ICOS+CD38+ cTfh (% CD4)") + scale_y_continuous(breaks=seq(0,10,0.25), limits = c(0.25,1))

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfhResp_overTime_freqCD4.pdf", width=4)
summary( fit <- lm(formula = value ~ Cohort * TimeCategory, data = melted) ); tukey_hsd(fit)        # testing using unpaired two-way ANOVA with Tukey's posthoc
## 
## Call:
## lm(formula = value ~ Cohort * TimeCategory, data = melted)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.69796 -0.19621 -0.03971  0.11118  1.78026 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                          0.36022    0.06832   5.272 8.72e-07 ***
## Cohortanti-PD-1                      0.06935    0.10197   0.680    0.498    
## TimeCategoryoneWeek                  0.08326    0.09663   0.862    0.391    
## Cohortanti-PD-1:TimeCategoryoneWeek  0.18513    0.14515   1.275    0.205    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.355 on 93 degrees of freedom
##   (8 observations deleted due to missingness)
## Multiple R-squared:  0.1115, Adjusted R-squared:  0.08285 
## F-statistic: 3.891 on 3 and 93 DF,  p-value: 0.01144
## # A tibble: 8 x 9
##   term  group1 group2 null.value estimate conf.low conf.high   p.adj
## * <chr> <chr>  <chr>       <dbl>    <dbl>    <dbl>     <dbl>   <dbl>
## 1 Coho~ Healt~ anti-~          0   0.159    0.0147     0.303 0.0311 
## 2 Time~ basel~ oneWe~          0   0.165    0.0221     0.308 0.0241 
## 3 Coho~ Healt~ anti-~          0   0.0694  -0.197      0.336 0.904  
## 4 Coho~ Healt~ Healt~          0   0.0833  -0.170      0.336 0.825  
## 5 Coho~ Healt~ anti-~          0   0.338    0.0675     0.608 0.00809
## 6 Coho~ anti-~ Healt~          0   0.0139  -0.253      0.281 0.999  
## 7 Coho~ anti-~ anti-~          0   0.268   -0.0150     0.552 0.0701 
## 8 Coho~ Healt~ anti-~          0   0.254   -0.0158     0.525 0.0725 
## # ... with 1 more variable: p.adj.signif <chr>
melted %>% group_by(Cohort) %>% kruskal_test(formula = value ~ TimeCategory)                        # testing per cohort using Kruskal-Wallis without adjustment
## # A tibble: 2 x 7
##   Cohort    .y.       n statistic    df     p method        
## * <fct>     <chr> <int>     <dbl> <int> <dbl> <chr>         
## 1 Healthy   value    54      1.89     1 0.169 Kruskal-Wallis
## 2 anti-PD-1 value    51      2.57     1 0.109 Kruskal-Wallis
index <- melted[melted$TimeCategory=="oneWeek", 'dbCode']
melted.paired <- melted[which(melted$dbCode %in% index), ]
prePostTimeAveraged(data = melted.paired, title = "cTfh", xLabel = NULL, yLabel = "ICOS+CD38+ cTfh (% CD4+)") + scale_y_continuous(breaks=seq(0,10,0.25), limits = c(0.25,1))+ 
  ylab(expression(paste("ICO", S^'+', "CD3", 8^'+', " (% CD", 4^'+', ")")))   # paired two-way ANOVA results calculated in Prism

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfhResp_overTime_freqCD4_paired.pdf", width=4)
# write.csv(melted.paired, file = "cTfh_responses_freqCD4.csv")
# write.csv(x = melted.paired, file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e1h.csv")


# ************** different denominator *********************
subsetData <- subset(mergedData, Cohort != "nonPD1")
subsetData$cTfh_ICOShiCD38hi_Ki67hi...FreqParent <- subsetData$cTfh_ICOShiCD38hi_Ki67hi...FreqParent * subsetData$cTfh_ICOShiCD38hi_..FreqParent * 
  subsetData$cTfh_..FreqParent * subsetData$Live_CD3..CD4..Nonnaive...FreqParent /1000000
melted <- melt(subsetData, id.vars = c('dbCode', 'TimeCategory', 'Cohort'), measure.vars = c("cTfh_ICOShiCD38hi_Ki67hi...FreqParent"))
prePostTimeAveraged(melted, title = "+/+ Ki67hi frequencies averaged", xLabel = NULL, yLabel = "+/+ Ki67hi (freq CD4)") 

summary(lm( data = subsetData, cTfh_ICOShiCD38hi_Ki67hi...FreqParent ~ Cohort  + TimeCategory + Year))    # Year is a batch effect
## 
## Call:
## lm(formula = cTfh_ICOShiCD38hi_Ki67hi...FreqParent ~ Cohort + 
##     TimeCategory + Year, data = subsetData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.43045 -0.09324 -0.02328  0.05492  1.12207 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          0.52594    0.08253   6.373 3.58e-09 ***
## Cohortanti-PD-1      0.10345    0.03567   2.900  0.00443 ** 
## TimeCategoryoneWeek  0.08411    0.03988   2.109  0.03701 *  
## TimeCategorylate    -0.04278    0.04667  -0.917  0.36121    
## Year                -0.15782    0.03045  -5.183 8.96e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1963 on 120 degrees of freedom
##   (31 observations deleted due to missingness)
## Multiple R-squared:  0.2374, Adjusted R-squared:  0.212 
## F-statistic:  9.34 on 4 and 120 DF,  p-value: 1.32e-06
tukey_hsd(aov(data = subsetData, formula = cTfh_ICOShiCD38hi_Ki67hi...FreqParent ~ Cohort:TimeCategory))  # proportion of CD4 not FreqParent
## # A tibble: 15 x 9
##    term  group1 group2 null.value estimate conf.low conf.high  p.adj
##  * <chr> <chr>  <chr>       <dbl>    <dbl>    <dbl>     <dbl>  <dbl>
##  1 Coho~ Healt~ anti-~          0  0.0584  -0.121      0.238  0.935 
##  2 Coho~ Healt~ Healt~          0  0.0419  -0.129      0.212  0.98  
##  3 Coho~ Healt~ anti-~          0  0.184    0.00155    0.366  0.0468
##  4 Coho~ Healt~ Healt~          0  0.00634 -0.200      0.213  1     
##  5 Coho~ Healt~ anti-~          0  0.00490 -0.201      0.211  1     
##  6 Coho~ anti-~ Healt~          0 -0.0165  -0.196      0.163  1     
##  7 Coho~ anti-~ anti-~          0  0.125   -0.0657     0.316  0.407 
##  8 Coho~ anti-~ Healt~          0 -0.0520  -0.266      0.162  0.981 
##  9 Coho~ anti-~ anti-~          0 -0.0535  -0.268      0.161  0.979 
## 10 Coho~ Healt~ anti-~          0  0.142   -0.0404     0.324  0.221 
## 11 Coho~ Healt~ Healt~          0 -0.0356  -0.242      0.171  0.996 
## 12 Coho~ Healt~ anti-~          0 -0.0370  -0.243      0.169  0.995 
## 13 Coho~ anti-~ Healt~          0 -0.177   -0.393      0.0387 0.172 
## 14 Coho~ anti-~ anti-~          0 -0.179   -0.395      0.0372 0.165 
## 15 Coho~ Healt~ anti-~          0 -0.00144 -0.238      0.235  1     
## # ... with 1 more variable: p.adj.signif <chr>
subsetData <- subset(subsetData, Cohort != "nonPD1" & TimeCategory == "oneWeek")
twoSampleBar(data=subsetData, xData="Cohort", yData="cTfh_ICOShiCD38hi_Ki67hi...FreqParent", fillParam="Cohort", title="Ki67 in ICOS+CD38+ cTfh at oW", 
             yLabel="+/+ Ki67hi (% CD4) ") 
## [1] "path 1:  ttest == T && batch == none && nonparam == F"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"

data1 = subset(subsetData, Cohort == "Healthy");  data2 = subset(subsetData, Cohort == "anti-PD-1")
bivScatter(data1 = data1, data2 = data2, name1 = "Healthy", name2 = "anti-PD-1",
           xData = "FCtfh_oW", yData = "cTfh_ICOShiCD38hi_Ki67hi...FreqParent", fillParam = "Cohort",
           title = expression(paste("Ki6",7^"+", " cTfh at one week")),
           xLabel = expression(paste("Fold-change in ICO", S^'+', "CD3", 8^'+'," cTfh")), 
           yLabel = expression(paste("Ki6",7^'+',"ICO", S^'+', "CD3", 8^'+', " cTfh (% CD4)")),
           nonparam = T) + scale_y_continuous(limits=c(0,3.5))
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/Ki67-vs-cTfhFoldChange_biv.pdf")
temp <- rbind(data1, data2)
# write.csv(temp[,c("dbCode","TimeCategory","FCtfh_oW","cTfh_ICOShiCD38hi_Ki67hi...FreqParent")], file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e5g.csv")


# ************** relationship to cycle of immunotherapy *********************
subsetData <- subset(mergedData,  cTfh_ICOShiCD38hi_..FreqParent > 1 & Cohort != "nonPD1")    #  *****   OUTLIER REMOVED
univScatter(data = subsetData, yData = "cTfh_ICOShiCD38hi_..FreqParent", xData = "Cycle.of.Immunotherapy", fillParam = "dummy", 
            title = "Cycle of IT vs +/+ cTfh at bL", yLabel= "ICOS+CD38+ cTfh freq", xLabel = "Cycle of immunotherapy") + coord_cartesian(ylim= c(0,8)) 
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 104 rows containing non-finite values (stat_smooth).
## Warning: Removed 104 rows containing missing values (geom_point).

summary(fit <- lm(Cycle.of.Immunotherapy ~ cTfh_ICOShiCD38hi_..FreqParent, subsetData))
## 
## Call:
## lm(formula = Cycle.of.Immunotherapy ~ cTfh_ICOShiCD38hi_..FreqParent, 
##     data = subsetData)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -8.231 -5.899 -1.788  2.776 17.980 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                      18.710      6.745   2.774   0.0136 *
## cTfh_ICOShiCD38hi_..FreqParent   -2.264      1.735  -1.305   0.2103  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.602 on 16 degrees of freedom
##   (104 observations deleted due to missingness)
## Multiple R-squared:  0.09623,    Adjusted R-squared:  0.03974 
## F-statistic: 1.704 on 1 and 16 DF,  p-value: 0.2103
univScatter(data = subsetData, yData = "FCtfh_oW", xData = "Cycle.of.Immunotherapy", fillParam = "dummy", 
            title = "cTfh responses", yLabel= "Fold-change at one week", xLabel = "Cycle of immunotherapy", nonparam=T)  + scale_fill_manual(values=c("#FFB18C"))+ 
  ggtitle(expression(paste("ICO",S^'+',"CD3",8^'+'," cTfh")))
## Warning in cor.test.default(data[, xData], data[, yData], method = "kendall"):
## Cannot compute exact p-value with ties
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 105 rows containing non-finite values (stat_smooth).
## Warning: Removed 105 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/CycleIT_vs_cTfhFC.pdf", device='pdf')
# write.csv(x = subsetData[,c("dbCode","Cohort","Cycle.of.Immunotherapy","FCtfh_oW")], file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e1l.csv")

———– BAA Year 3 analysis - healthy young and elderly adults ——————–

BAAyear3long <- read.csv(file="D:/Pembro-Fluvac/Analysis/mergedData/BAAyear3long.csv"); names(BAAyear3long)[6] <- 'TimePoint'
BAAyear3long$Cohort <- factor(BAAyear3long$Cohort, levels=c("Young","Elderly"))
temp <- BAAyear3long[!duplicated(BAAyear3long$Subject),c("Subject","Cohort")]
temp$dbCode <- paste("Participant",1:nrow(temp))
BAAyear3long <- merge(x=BAAyear3long, y=temp, all.x=T, by=c("Subject", "Cohort"))
prePostTime(data = BAAyear3long, xData = 'TimePoint', yData = 'ICOShiCD38hi_FreqParent', fillParam = 'Cohort', groupby = 'dbCode', title = "cTfh with Aging",
            xLabel = 'Age Group', yLabel = "ICOS+CD38+ (% cTfh)") + scale_y_continuous(breaks=seq(0,60,5),limits = c(0,55)) + 
  scale_fill_manual(values = c("#EABD81", "#7B4D9A"))  + ylab(expression(paste(paste("ICO",S^'+', "CD3",8^'+' ), " (% cTfh)")))
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

tukey_hsd( aov(data = BAAyear3long, formula = `ICOShiCD38hi_FreqParent` ~ `Cohort`:`TimePoint`))
## # A tibble: 6 x 9
##   term  group1 group2 null.value estimate conf.low conf.high   p.adj
## * <chr> <chr>  <chr>       <dbl>    <dbl>    <dbl>     <dbl>   <dbl>
## 1 Coho~ Young~ Elder~          0    -2.48   -8.10      3.13  6.57e-1
## 2 Coho~ Young~ Young~          0     6.84    1.13     12.6   1.21e-2
## 3 Coho~ Young~ Elder~          0     2.12   -3.50      7.73  7.59e-1
## 4 Coho~ Elder~ Young~          0     9.32    3.71     14.9   1.92e-4
## 5 Coho~ Elder~ Elder~          0     4.60   -0.914    10.1   1.36e-1
## 6 Coho~ Young~ Elder~          0    -4.72  -10.3       0.892 1.31e-1
## # ... with 1 more variable: p.adj.signif <chr>
# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/BAAyear3_FCtfh.pdf")
# write.csv(BAAyear3long[,c("dbCode","TimePoint","ICOShiCD38hi_FreqParent")], file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e1m.csv")

BAAyear3wide <- read.csv(file="D:/Pembro-Fluvac/Analysis/mergedData/BAAyear3wide.csv") 
BAAyear3wide$Cohort <- factor(BAAyear3wide$Cohort, levels=c("Young","Elderly"))

twoSampleBar(data = BAAyear3wide, xData = "Cohort", yData = "FCtfh", fillParam = "Cohort", title = "ICOS+CD38+ cTfh with aging", yLabel = "Fold-change at one week", FCplot = T) + 
  scale_y_continuous(breaks=seq(0,6,1), limits=c(0,6))+ theme(panel.grid.minor = element_blank(), panel.grid.major=element_blank())
## [1] "path 1:  ttest == T && batch == none && nonparam == F"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"

## ******************** Subsets based on CXCR3 and CCR6 ***********************************

subsetData <- subset(mergedData, Cohort != "nonPD1" )
subsetData$cTfh_ICOShiCD38hi_CXCR3hi...FreqParent <- subsetData$cTfh_ICOShiCD38hi_CXCR3hi...FreqParent * subsetData$cTfh_ICOShiCD38hi_..FreqParent * 
  subsetData$cTfh_..FreqParent * subsetData$Live_CD3..CD4..Nonnaive...FreqParent /1000000
melted <- melt(subsetData, id.vars = c('dbCode', 'TimeCategory', 'Cohort'), measure.vars = c("cTfh_ICOShiCD38hi_CXCR3hi...FreqParent"))
prePostTimeAveraged(melted, title = "+/+ CXCR3hi frequencies averaged", xLabel = NULL, yLabel = "+/+ CXCR3hi (freq CD4)") 

summary(lm( data = subsetData, cTfh_ICOShiCD38hi_CXCR3hi...FreqParent ~ Cohort + Year + TimeCategory))
## 
## Call:
## lm(formula = cTfh_ICOShiCD38hi_CXCR3hi...FreqParent ~ Cohort + 
##     Year + TimeCategory, data = subsetData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.34907 -0.09471 -0.02436  0.05345  1.10364 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          0.235557   0.077701   3.032 0.002981 ** 
## Cohortanti-PD-1      0.084661   0.033581   2.521 0.013010 *  
## Year                -0.059007   0.028669  -2.058 0.041735 *  
## TimeCategoryoneWeek  0.146861   0.037546   3.911 0.000153 ***
## TimeCategorylate    -0.004802   0.043940  -0.109 0.913163    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1848 on 120 degrees of freedom
##   (31 observations deleted due to missingness)
## Multiple R-squared:  0.1821, Adjusted R-squared:  0.1548 
## F-statistic:  6.68 on 4 and 120 DF,  p-value: 6.891e-05
subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('dbCode', 'TimeCategory', 'Cohort'), measure.vars = c("cTfh_ICOShiCD38hi_CXCR3lo_CCR6lo...FreqParent"))
prePostTimeAveraged(melted, title = "+/+ X3lo R6lo", xLabel = NULL, yLabel = "X3loR6lo (%ICOS+CD38+ cTfh)") 

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory != "late")
prePostTime(subsetData, xData = "TimeCategory", yData = "cTfh_ICOShiCD38hi_CXCR3lo_CCR6lo...FreqParent", fillParam = "Cohort", title = "X3loR6lo", xLabel = "TimeCategory",
            yLabel = "CXCR3lo CCR6lo (% ICOS+CD38+ cTfh)", groupby = "dbCode") + scale_y_continuous(breaks=seq(0,240,10), limits=c(0,60))    +
  ylab(expression(paste( "CXCR", 3^"lo", "CCR", 6^"lo", " (% ICO", S^'+', "CD3", 8^"+", " cTfh)")))
## Warning: Removed 43 rows containing non-finite values (stat_summary).
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## Warning: Removed 43 row(s) containing missing values (geom_path).
## Warning: Removed 43 rows containing missing values (geom_point).

# ggsave( filename = "D:/Pembro-Fluvac/Analysis/Images/ICOShiCD38hi_X3loR6lo_prepostTime.pdf", width=5, height=7)
# write.csv(x = subsetData[,c("dbCode", "Cohort","TimeCategory","cTfh_ICOShiCD38hi_CXCR3lo_CCR6lo...FreqParent")], file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e1o-1.csv")


temp <- reshape2::dcast(subset(subsetData, Cohort == "Healthy"), formula =  dbCode  ~  TimeCategory, value.var="cTfh_ICOShiCD38hi_CXCR3lo_CCR6lo...FreqParent"  )
wilcox.test(temp$oneWeek, temp$baseline, paired=T)
## 
##  Wilcoxon signed rank exact test
## 
## data:  temp$oneWeek and temp$baseline
## V = 18, p-value = 0.01508
## alternative hypothesis: true location shift is not equal to 0
temp <- reshape2::dcast(subset(subsetData, Cohort == "anti-PD-1"), formula =  dbCode  ~  TimeCategory, value.var="cTfh_ICOShiCD38hi_CXCR3lo_CCR6lo...FreqParent"  )
wilcox.test(temp$oneWeek, temp$baseline, paired=T)
## Warning in wilcox.test.default(temp$oneWeek, temp$baseline, paired = T): cannot
## compute exact p-value with ties
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  temp$oneWeek and temp$baseline
## V = 30, p-value = 0.05245
## alternative hypothesis: true location shift is not equal to 0
melted <- melt(subsetData, id.vars = c('dbCode', 'TimeCategory', 'Cohort'), measure.vars = c("cTfh_ICOShiCD38hi_CXCR3lo_CCR6hi...FreqParent"))
prePostTimeAveraged(melted, title = "+/+ X3lo R6hi", xLabel = NULL, yLabel = "X3loR6hi (%ICOS+CD38+ cTfh)") 

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory != "late")
prePostTime(subsetData, xData = "TimeCategory", yData = "cTfh_ICOShiCD38hi_CXCR3lo_CCR6hi...FreqParent", fillParam = "Cohort", title = "X3loR6hi", xLabel = "TimeCategory",
            yLabel = "CXCR3lo CCR6hi (% ICOS+CD38+ cTfh)", groupby = "dbCode") + scale_y_continuous(breaks=seq(0,240,10), limits=c(0,50))     +
  ylab(expression(paste( "CXCR", 3^"lo", "CCR", 6^"hi", " (% ICO", S^'+', "CD3", 8^"+", " cTfh)")))
## Warning: Removed 43 rows containing non-finite values (stat_summary).
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## Warning: Removed 43 row(s) containing missing values (geom_path).

## Warning: Removed 43 rows containing missing values (geom_point).

# ggsave( filename = "D:/Pembro-Fluvac/Analysis/Images/ICOShiCD38hi_X3loR6hi_prepostTime.pdf", width=5, height=7)
# write.csv(x = subsetData[,c("dbCode", "Cohort","TimeCategory","cTfh_ICOShiCD38hi_CXCR3lo_CCR6hi...FreqParent")], file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e1o-2.csv")

temp <- reshape2::dcast(subset(subsetData, Cohort == "Healthy"), formula =  dbCode  ~  TimeCategory, value.var="cTfh_ICOShiCD38hi_CXCR3lo_CCR6hi...FreqParent"  )
wilcox.test(temp$oneWeek, temp$baseline, paired=T)
## 
##  Wilcoxon signed rank exact test
## 
## data:  temp$oneWeek and temp$baseline
## V = 31, p-value = 0.107
## alternative hypothesis: true location shift is not equal to 0
temp <- reshape2::dcast(subset(subsetData, Cohort == "anti-PD-1"), formula =  dbCode  ~  TimeCategory, value.var="cTfh_ICOShiCD38hi_CXCR3lo_CCR6hi...FreqParent"  )
wilcox.test(temp$oneWeek, temp$baseline, paired=T)
## Warning in wilcox.test.default(temp$oneWeek, temp$baseline, paired = T): cannot
## compute exact p-value with ties
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  temp$oneWeek and temp$baseline
## V = 28.5, p-value = 0.0437
## alternative hypothesis: true location shift is not equal to 0
melted <- melt(subsetData, id.vars = c('dbCode', 'TimeCategory', 'Cohort'), measure.vars = c("cTfh_ICOShiCD38hi_CXCR3hi_CCR6lo...FreqParent"))
prePostTimeAveraged(melted, title = "+/+ X3hi R6lo", xLabel = NULL, yLabel = "X3hiR6lo (%ICOS+CD38+ cTfh)") 

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory != "late")
prePostTime(subsetData, xData = "TimeCategory", yData = "cTfh_ICOShiCD38hi_CXCR3hi_CCR6lo...FreqParent", fillParam = "Cohort", title = "X3hiR6lo", xLabel = "TimeCategory",
            yLabel = "CXCR3hi CCR6lo (% ICOS+CD38+ cTfh)", groupby = "dbCode") + scale_y_continuous(breaks=seq(0,240,10), limits = c(0,70))     +
  ylab(expression(paste( "CXCR", 3^"hi", "CCR", 6^"lo", " (% ICO", S^'+', "CD3", 8^"+", " cTfh)")))
## Warning: Removed 43 rows containing non-finite values (stat_summary).
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## Warning: Removed 43 row(s) containing missing values (geom_path).

## Warning: Removed 43 rows containing missing values (geom_point).

# ggsave( filename = "D:/Pembro-Fluvac/Analysis/Images/ICOShiCD38hi_X3hiR6lo_prepostTime.pdf", width=5, height=7)
# write.csv(x = subsetData[,c("dbCode", "Cohort","TimeCategory","cTfh_ICOShiCD38hi_CXCR3hi_CCR6lo...FreqParent")], file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e1o-3.csv")

temp <- reshape2::dcast(subset(subsetData, Cohort == "Healthy"), formula =  dbCode  ~  TimeCategory, value.var="cTfh_ICOShiCD38hi_CXCR3hi_CCR6lo...FreqParent"  )
wilcox.test(temp$oneWeek, temp$baseline, paired=T)
## 
##  Wilcoxon signed rank exact test
## 
## data:  temp$oneWeek and temp$baseline
## V = 111, p-value = 0.002014
## alternative hypothesis: true location shift is not equal to 0
temp <- reshape2::dcast(subset(subsetData, Cohort == "anti-PD-1"), formula =  dbCode  ~  TimeCategory, value.var="cTfh_ICOShiCD38hi_CXCR3hi_CCR6lo...FreqParent"  )
wilcox.test(temp$oneWeek, temp$baseline, paired=T)
## Warning in wilcox.test.default(temp$oneWeek, temp$baseline, paired = T): cannot
## compute exact p-value with ties
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  temp$oneWeek and temp$baseline
## V = 129, p-value = 0.001755
## alternative hypothesis: true location shift is not equal to 0
## ******************** Tfr analyses ***********************************
subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('dbCode', 'TimeCategory', 'Cohort'), measure.vars = c("cTfh_ICOShiCD38hi_cTfh..._medfi.CD25."))
twoSampleBar(data=subset(subsetData, TimeCategory == "baseline"), xData="Cohort", yData="cTfh_ICOShiCD38hi_cTfh..._medfi.CD25.", fillParam="Cohort", 
             title="medianFI CD25 in ICOS+CD38+ cTfh at baseline", yLabel="medianFI CD25 in ICOS+CD38+ cTfh", position="left")  
## [1] "path 1:  ttest == T && batch == none && nonparam == F"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"
## Warning: Removed 26 rows containing missing values (position_quasirandom).

summary(lm( data = subsetData, cTfh_ICOShiCD38hi_cTfh..._medfi.CD25. ~ Cohort + Year + TimeCategory))
## 
## Call:
## lm(formula = cTfh_ICOShiCD38hi_cTfh..._medfi.CD25. ~ Cohort + 
##     Year + TimeCategory, data = subsetData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -194.397  -59.213   -0.829   55.322  177.784 
## 
## Coefficients: (1 not defined because of singularities)
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            87.22      18.43   4.733 1.08e-05 ***
## Cohortanti-PD-1       116.22      20.08   5.787 1.73e-07 ***
## Year                      NA         NA      NA       NA    
## TimeCategoryoneWeek    10.77      21.55   0.500  0.61862    
## TimeCategorylate       77.82      27.85   2.794  0.00666 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 84.83 on 72 degrees of freedom
##   (80 observations deleted due to missingness)
## Multiple R-squared:  0.4138, Adjusted R-squared:  0.3894 
## F-statistic: 16.94 on 3 and 72 DF,  p-value: 2.003e-08
subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('dbCode', 'TimeCategory', 'Cohort'), measure.vars = c("cTfh_ICOShiCD38hi_Foxp3hi...FreqParent"))
prePostTimeAveraged(melted, title = "Foxp3+ ICOS+CD38+ cTfh", xLabel = NULL, yLabel = "Foxp3+ (% ICOS+CD38+ cTfh)") # + scale_y_continuous(breaks=seq(0,99,0.25))

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory != "late")
prePostTime(subsetData, xData = "TimeCategory", yData = "cTfh_ICOShiCD38hi_Foxp3hi...FreqParent", fillParam = "Cohort", title = "cTfr", xLabel = "TimeCategory",
            yLabel = "Foxp3+ (% ICOS+CD38+ cTfh)", groupby = "dbCode") + scale_y_continuous(breaks=seq(0,240,10), limits = c(0,40))    + 
  ylab(expression(paste("Foxp",3^'+', " (%ICO",S^"+","CD3",8^"+", " cTfh)")))
## Warning: Removed 8 rows containing non-finite values (stat_summary).
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## Warning: Removed 8 row(s) containing missing values (geom_path).
## Warning: Removed 8 rows containing missing values (geom_point).

# ggsave( filename = "D:/Pembro-Fluvac/Analysis/Images/ICOShiCD38hi_Foxp3_prepostTime.pdf", width=5, height=7)
# write.csv(x = subsetData[,c("dbCode", "Cohort","TimeCategory","cTfh_ICOShiCD38hi_Foxp3hi...FreqParent")], file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e1q.csv")
temp <- reshape2::dcast(subset(subsetData, Cohort == "Healthy"), formula =  dbCode  ~  TimeCategory, value.var="cTfh_ICOShiCD38hi_Foxp3hi...FreqParent"  )
wilcox.test(temp$oneWeek, temp$baseline, paired=T)
## Warning in wilcox.test.default(temp$oneWeek, temp$baseline, paired = T): cannot
## compute exact p-value with ties
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  temp$oneWeek and temp$baseline
## V = 80.5, p-value = 0.009465
## alternative hypothesis: true location shift is not equal to 0
temp <- reshape2::dcast(subset(subsetData, Cohort == "anti-PD-1"), formula =  dbCode  ~  TimeCategory, value.var="cTfh_ICOShiCD38hi_Foxp3hi...FreqParent"  )
wilcox.test(temp$oneWeek, temp$baseline, paired=T)
## 
##  Wilcoxon signed rank exact test
## 
## data:  temp$oneWeek and temp$baseline
## V = 29, p-value = 0.0016
## alternative hypothesis: true location shift is not equal to 0
##  different denominator
subsetData$cTfh_ICOShiCD38hi_Foxp3hi...FreqParent <- subsetData$cTfh_ICOShiCD38hi_Foxp3hi...FreqParent * subsetData$cTfh_ICOShiCD38hi_..FreqParent /100
prePostTime(subsetData, xData = "TimeCategory", yData = "cTfh_ICOShiCD38hi_Foxp3hi...FreqParent", fillParam = "Cohort", title = "cTfr", xLabel = "TimeCategory",
            yLabel = "Foxp3+ICOS+CD38+ (% CXCR5+)", groupby = "dbCode") + scale_y_continuous(breaks=seq(0,240,1), limits=c(0,3))    + 
  ylab(expression(paste("Foxp",3^'+', "ICO",S^"+","CD3",8^"+", " (% CXCR",5^"+",")")))
## Warning: Removed 8 rows containing non-finite values (stat_summary).
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## Warning: Removed 8 row(s) containing missing values (geom_path).

## Warning: Removed 8 rows containing missing values (geom_point).

# ggsave( filename = "D:/Pembro-Fluvac/Analysis/Images/ICOShiCD38hi_Foxp3_prepostTime_freqX5.pdf", width=5, height=7)
temp <- reshape2::dcast(subset(subsetData, Cohort == "Healthy"), formula =  dbCode  ~  TimeCategory, value.var="cTfh_ICOShiCD38hi_Foxp3hi...FreqParent"  )
wilcox.test(temp$oneWeek, temp$baseline, paired=T)
## 
##  Wilcoxon signed rank exact test
## 
## data:  temp$oneWeek and temp$baseline
## V = 173, p-value = 0.714
## alternative hypothesis: true location shift is not equal to 0
temp <- reshape2::dcast(subset(subsetData, Cohort == "anti-PD-1"), formula =  dbCode  ~  TimeCategory, value.var="cTfh_ICOShiCD38hi_Foxp3hi...FreqParent"  )
wilcox.test(temp$oneWeek, temp$baseline, paired=T)
## 
##  Wilcoxon signed rank exact test
## 
## data:  temp$oneWeek and temp$baseline
## V = 93, p-value = 0.4524
## alternative hypothesis: true location shift is not equal to 0
# this suggests the cTfr differences are not due to global differences in Foxp3 expression but rather shifts within the CXCR5+ compartment

subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('dbCode', 'TimeCategory', 'Cohort'), measure.vars = c("cTfh_ICOShiCD38hi_Foxp3hi...FreqParent"))
temp <- reshape2::dcast(melted, dbCode + Cohort  ~ TimeCategory, value.var = 'value'); #  temp <- temp[, -which(names(temp) == "late")]
temp$FC <- temp$oneWeek / temp$baseline
temp <- temp[-which(is.na(temp$FC)), ]
twoSampleBar(data = temp, xData = "Cohort", yData = "FC", fillParam="Cohort", title="cTfr", yLabel = "Fold-change in \nFoxp3+ ICOS+CD38+ cTfh", 
             nonparam = T) + ylab(expression(paste("Fold-change in cTfr")))  
## [1] "path 2:  ttest == T && batch == none && nonparam == T"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"

# ggsave( filename = "D:/Pembro-Fluvac/Analysis/Images/ICOShiCD38hi_Foxp3_fold-change.pdf", width=5, height=8)
wilcox_test(temp, FC ~ Cohort)
## # A tibble: 1 x 7
##   .y.   group1  group2       n1    n2 statistic       p
## * <chr> <chr>   <chr>     <int> <int>     <dbl>   <dbl>
## 1 FC    Healthy anti-PD-1    27    21       428 0.00223
# write.csv(x = temp[,c("dbCode", "Cohort","FC")], file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e1r.csv")

———– PB response and correlations with cTfh response ——————–

## ***************    CD27++CD38++     ********************

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek")
twoSampleBar(data=subsetData, xData="Cohort", yData="CD27hiCD38hi_..FreqParent", fillParam="Cohort", title="All yrs: Plasmablast at d7", yLabel="CD19+CD27++CD38++ frequency at d7")
## [1] "path 1:  ttest == T && batch == none && nonparam == F"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"
## Warning: Removed 18 rows containing missing values (position_quasirandom).

subsetData <- subset(mergedData, TimeCategory != "late"  & Cohort != "nonPD1")
FC_PBresponse <- dcast(subsetData, `dbCode`+`Cohort`~`TimeCategory`, value.var = c("CD27hiCD38hi_..FreqParent")); 
FC_PBresponse$FC <- FC_PBresponse$`oneWeek`/FC_PBresponse$`baseline`
twoSampleBar(data=FC_PBresponse, xData="Cohort", yData="FC", fillParam="Cohort", title="Plasmablasts", yLabel="Fold-change at one week",
             FCplot=T) +   scale_y_continuous(breaks=seq(0,99,1))
## [1] "path 1:  ttest == T && batch == none && nonparam == F"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"
## Warning: Removed 27 rows containing missing values (position_quasirandom).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/PBresponse_foldchange_AllYrs.pdf", device="pdf", width=4.1, height=7)
# write.csv(x = FC_PBresponse, file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e2b.csv")



oneWeek <- subset(mergedData, TimeCategory == "oneWeek")
univScatter(data = oneWeek, yData = "FCPB_oW", xData = "FCtfh_oW", fillParam = "TimeCategory", 
            title = "+/+ cTfh vs CD27++CD38++ at oneWeek", yLabel= "CD27++CD38++ (freq IgD-)", xLabel = "ICOS+CD38+ cTfh frequency")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 22 rows containing non-finite values (stat_smooth).
## Warning: Removed 22 rows containing missing values (geom_point).

subsetData1 <- subset(oneWeek, Cohort == "Healthy" )    
subsetData2 <- subset(oneWeek, Cohort == "anti-PD-1")    
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "anti-PD-1", yData = "FCPB_oW", xData="FCtfh_oW", 
           fillParam = "Cohort", title = "Cohort 2 - One Week", yLabel= "Fold-change CD27++CD38++ PB", xLabel = "Fold-change ICOS+CD38+ cTfh", nonparam = T, 
           statsOff = F) + 
  scale_x_continuous(breaks=seq(0,99,1),limits=c(0,7)) + scale_y_continuous(breaks=seq(0,99,2),limits=c(0,15)) + 
  xlab(expression(paste("Fold-change in ICO",S^'+',"CD3",8^'+'," cTfh"))) + ylab(expression(paste("Fold-change in Plasmablasts"))) + 
  theme(panel.grid.minor = element_blank(), panel.grid.major=element_blank())
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 13 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 5 rows containing non-finite values (stat_smooth).
## Warning: Removed 13 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing missing values (geom_point).

bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "anti-PD-1", yData = "FCPB_oW", xData="FCtfh_oW", 
           fillParam = "Cohort", title = "Cohort 2 - One Week", yLabel= "Fold-change CD27++CD38++ PB", xLabel = "Fold-change ICOS+CD38+ cTfh", nonparam = T, 
           statsOff = T) + 
  scale_x_continuous(breaks=seq(0,99,1),limits=c(0,7)) + scale_y_continuous(breaks=seq(0,99,2),limits=c(0,15)) + 
  xlab(expression(paste("Fold-change in ICO",S^'+',"CD3",8^'+'," cTfh"))) + ylab(expression(paste("Fold-change in Plasmablasts"))) + 
  theme(panel.grid.minor = element_blank(), panel.grid.major=element_blank())
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 13 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 5 rows containing non-finite values (stat_smooth).
## Warning: Removed 13 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfh-vs-PB_foldchange.pdf", device="pdf", width=8, height=6)
temp <- rbind(subsetData1,subsetData2); names(temp)[grep(pattern="FCPB_oW",names(temp))] <- "Fold-change in plasmablasts"; names(temp)[grep(pattern = "FCtfh_oW",names(temp))] <- "Fold-change in ICOS+CD38+ cTfh"
# write.csv(x = temp[,c("dbCode", "Cohort","Fold-change in plasmablasts","Fold-change in ICOS+CD38+ cTfh")], file="D:/Pembro-Fluvac/Analysis/sourceDataExports/1f-2.csv")



## ***************    CD71+ CD20lo ASC    ********************
baseline <- subset(mergedData, TimeCategory == "baseline")
baseline$IgDlo_CD71hi_ActBCells...FreqParent <-  baseline$IgDlo_CD71hi_ActBCells...FreqParent * baseline$IgDlo_CD71hi_..FreqParent * baseline$CD19hi_NotNaiveB_freqParent / 10000
baseline$IgDlo_CD71hi_CD20loCD71hi...FreqParent <-  baseline$IgDlo_CD71hi_CD20loCD71hi...FreqParent * baseline$IgDlo_CD71hi_..FreqParent * baseline$CD19hi_NotNaiveB_freqParent / 10000

subsetData1 <- subset(baseline, Cohort == "Healthy"   )   
subsetData2 <- subset(baseline, Cohort == "anti-PD-1"  )      
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "anti-PD-1", yData = "IgDlo_CD71hi_ActBCells...FreqParent", xData="cTfh_ICOShiCD38hi_..FreqParent", 
           fillParam = "Cohort", title = "ABC at baseline", yLabel= "ABC (% CD19)", xLabel = "ICOS+CD38+ (% cTfh)", nonparam=T) + 
  coord_cartesian(xlim=c(0,7.5),ylim=c(0,3)) + xlab(expression(paste("ICO",S^"+","CD3",8^"+"," (% cTfh)")))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 14 rows containing non-finite values (stat_smooth).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 14 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfh-d0_vs_ABC-71hi-20hi-d0_freqCD19_biv.pdf", device="pdf")
temp <- rbind(subsetData1,subsetData2); 
# write.csv(x = temp[,c("dbCode", "Cohort","IgDlo_CD71hi_ActBCells...FreqParent","cTfh_ICOShiCD38hi_..FreqParent")], file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e4q-1.csv")


subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "baseline" & IgDlo_CD71hi_ActBCells...FreqParent > 0 & IgDlo_CD71hi_CD20loCD71hi...FreqParent >0)
twoSampleBar(data=subsetData, xData="Cohort", yData="IgDlo_CD71hi_CD20loCD71hi...FreqParent", fillParam="Cohort", title="baseline ASC", 
             yLabel="CD20- ASC (% CD71+)")+ scale_y_continuous(limits=c(0,100), breaks=seq(0,100,10))
## [1] "path 1:  ttest == T && batch == none && nonparam == F"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/ASC_atBaseline_barPlot.pdf", width=4)
# sourceData exported as part of e4p.csv
# ************** different denominator *********************
oneWeek <- subset(mergedData, TimeCategory == "oneWeek")
oneWeek$IgDlo_CD71hi_CD20loCD71hi...FreqParent <-  oneWeek$IgDlo_CD71hi_CD20loCD71hi...FreqParent * oneWeek$IgDlo_CD71hi_..FreqParent * oneWeek$CD19hi_NotNaiveB_freqParent / 10000
subsetData1 <- subset(oneWeek, Cohort == "Healthy" )   
subsetData2 <- subset(oneWeek, Cohort == "anti-PD-1"  )    
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "anti-PD-1", yData = "IgDlo_CD71hi_CD20loCD71hi...FreqParent", xData="cTfh_ICOShiCD38hi_..FreqParent", 
           fillParam = "Cohort", title = "cTfh vs CD20lo response at oneWeek", yLabel= "ASC (% CD19)", xLabel = "ICOS+CD38+ (% cTfh)", nonparam=T) + 
  coord_cartesian(xlim = c(0,11.2), ylim=c(0,3)) + xlab(expression(paste("ICO",S^"+","CD3",8^"+"," (% cTfh)")))+ theme(panel.grid.minor = element_blank(), panel.grid.major=element_blank())
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 13 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 5 rows containing non-finite values (stat_smooth).
## Warning: Removed 13 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing missing values (geom_point).

 # ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfh-d7_vs_ASC-71hi-20lo-d7_freqCD19_biv.pdf", device="pdf")
temp <- rbind(subsetData1,subsetData2); 
# write.csv(x = temp[,c("dbCode", "Cohort","IgDlo_CD71hi_CD20loCD71hi...FreqParent","cTfh_ICOShiCD38hi_..FreqParent")], file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e4r-1.csv")

subsetData <- subset(mergedData, Cohort != "nonPD1" & IgDlo_CD71hi_CD20loCD71hi...FreqParent != 0 )   # exclude zero values due to presumptive technical artifact
prePostTime(subsetData, xData = "TimeCategory", yData = "IgDlo_CD71hi_CD20loCD71hi...FreqParent", fillParam = "Cohort", title = "ASC response by subject", 
            xLabel = "TimeCategory", yLabel = "CD20- ASC (% IgD-CD71+)", groupby = "dbCode") + scale_y_continuous(breaks=seq(0,150,5))     
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

## ***************    CD71+ CD20hi ABC    ********************
baseline <- subset(mergedData, TimeCategory == "baseline")
baseline$IgDlo_CD71hi_ActBCells...FreqParent <-  baseline$IgDlo_CD71hi_ActBCells...FreqParent * baseline$IgDlo_CD71hi_..FreqParent * baseline$CD19hi_NotNaiveB_freqParent / 10000
baseline$IgDlo_CD71hi_CD20loCD71hi...FreqParent <-  baseline$IgDlo_CD71hi_CD20loCD71hi...FreqParent * baseline$IgDlo_CD71hi_..FreqParent * baseline$CD19hi_NotNaiveB_freqParent / 10000
subsetData1 <- subset(baseline, Cohort == "Healthy"   )   
subsetData2 <- subset(baseline, Cohort == "anti-PD-1"  )      
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "anti-PD-1", yData = "IgDlo_CD71hi_CD20loCD71hi...FreqParent", xData="cTfh_ICOShiCD38hi_..FreqParent", 
           fillParam = "Cohort", title = "ASC at baseline", yLabel= "ASC (% CD19)", xLabel = "ICOS+CD38+ (% cTfh)", nonparam=T) + 
  coord_cartesian(xlim=c(0,7.5),ylim=c(0,3)) + xlab(expression(paste("ICO",S^"+","CD3",8^"+"," (% cTfh)")))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 14 rows containing non-finite values (stat_smooth).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 14 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfh-d0_vs_ASC-71hi-20lo-d0_freqCD19_biv.pdf", device="pdf")
temp <- rbind(subsetData1,subsetData2); 
# write.csv(x = temp[,c("dbCode", "Cohort","IgDlo_CD71hi_CD20loCD71hi...FreqParent","cTfh_ICOShiCD38hi_..FreqParent")], file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e4q-2.csv")


subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "baseline" & IgDlo_CD71hi_ActBCells...FreqParent > 0 & IgDlo_CD71hi_CD20loCD71hi...FreqParent >0)
twoSampleBar(data=subsetData, xData="Cohort", yData="IgDlo_CD71hi_ActBCells...FreqParent", fillParam="Cohort", title="baseline ABC", 
             yLabel="CD20+ ABC (% CD71+)") + scale_y_continuous(limits=c(0,100), breaks=seq(0,100,10))
## [1] "path 1:  ttest == T && batch == none && nonparam == F"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/ABC_atBaseline_barPlot.pdf", width=4)
# write.csv(x = subsetData[,c("Cohort", "IgDlo_CD71hi_ActBCells...FreqParent", "IgDlo_CD71hi_CD20loCD71hi...FreqParent")], file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e4p.csv")
# ************** different denominator *********************
oneWeek <- subset(mergedData, TimeCategory == "oneWeek")
oneWeek$IgDlo_CD71hi_ActBCells...FreqParent <-  oneWeek$IgDlo_CD71hi_ActBCells...FreqParent * oneWeek$IgDlo_CD71hi_..FreqParent * oneWeek$CD19hi_NotNaiveB_freqParent / 10000

subsetData1 <- subset(oneWeek, Cohort == "Healthy"   )   
subsetData2 <- subset(oneWeek, Cohort == "anti-PD-1"  )      
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "anti-PD-1", yData = "IgDlo_CD71hi_ActBCells...FreqParent", xData="cTfh_ICOShiCD38hi_..FreqParent", 
           fillParam = "Cohort", title = "cTfh vs ABC at oneWeek", yLabel= "ABC (% CD19)", xLabel = "ICOS+CD38+ (% cTfh)", nonparam=T) + 
  coord_cartesian(xlim=c(0,11.2),ylim=c(0,3)) + xlab(expression(paste("ICO",S^"+","CD3",8^"+"," (% cTfh)")))
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 13 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 5 rows containing non-finite values (stat_smooth).
## Warning: Removed 13 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing missing values (geom_point).

 # ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfh-d7_vs_ABC-71hi-20hi-d7_freqCD19_biv.pdf", device="pdf")
temp <- rbind(subsetData1,subsetData2); 
# write.csv(x = temp[,c("dbCode", "Cohort","IgDlo_CD71hi_CD20loCD71hi...FreqParent","cTfh_ICOShiCD38hi_..FreqParent")], file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e4r-2.csv")


subsetData <- subset(mergedData, Cohort != "nonPD1" & IgDlo_CD71hi_ActBCells...FreqParent != 0 )   # exclude zero values due to presumptive technical artifact
prePostTime(subsetData, xData = "TimeCategory", yData = "IgDlo_CD71hi_ActBCells...FreqParent", fillParam = "Cohort", title = "ABC response by subject", 
            xLabel = "TimeCategory", yLabel = "CD20+ ABC (% IgD-CD71+)", groupby = "dbCode") + scale_y_continuous(breaks=seq(0,150,5)) 
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "baseline" & IgDlo_CD71hi_ActBCells...FreqParent > 0 & IgDlo_CD71hi_CD20loCD71hi...FreqParent >0)
twoSampleBar(data=subsetData, xData="Cohort", yData="ASC_ABCratio", fillParam="Cohort", title="baseline ASC/ABC ratio", 
             yLabel="ASC/ABC ratio") + scale_y_continuous(limits=c(0,8), breaks=seq(0,100,1))
## [1] "path 1:  ttest == T && batch == none && nonparam == F"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"

# ************** correlations to Tfh responses *********************
data1 = mergedData[which(mergedData$Cohort == "Healthy"),]; data2 = mergedData[ which(mergedData$Cohort == "anti-PD-1") ,]
bivScatter(data1 = data1, data2 = data2, name1 = "Healthy", name2 = "anti-PD-1", xData = "Age", yData = "FCtfh_oW", fillParam = "Cohort", title = "FC Tfh vs Age", 
           xLabel = "Age", yLabel = "ICOS+CD38+ cTfh fold-change", nonparam=T) + scale_y_continuous(limits=c(0,12))
## Warning in cor.test.default(data1[, xData], data1[, yData], method = "kendall"):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(data2[, xData], data2[, yData], method = "kendall"):
## Cannot compute exact p-value with ties
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 54 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 55 rows containing non-finite values (stat_smooth).
## Warning: Removed 54 rows containing missing values (geom_point).
## Warning: Removed 55 rows containing missing values (geom_point).

bivScatter(data1 = data1, data2 = data2, name1 = "Healthy", name2 = "anti-PD-1", xData = "Age", yData = "FCPB_oW", fillParam = "Cohort", title = "FC PB vs Age", 
           xLabel = "Age", yLabel = "CD27++CD38++ pB fold-change", nonparam=T) + scale_y_continuous(limits=c(0,12))
## Warning in cor.test.default(data1[, xData], data1[, yData], method = "kendall"):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(data2[, xData], data2[, yData], method = "kendall"):
## Cannot compute exact p-value with ties
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 67 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 59 rows containing non-finite values (stat_smooth).
## Warning: Removed 67 rows containing missing values (geom_point).
## Warning: Removed 59 rows containing missing values (geom_point).
## Warning: Removed 33 rows containing missing values (geom_smooth).

bivScatter(data1 = data1, data2 = data2, name1 = "Healthy", name2 = "anti-PD-1", xData = "Age", yData = "FCCXCL13_oW", fillParam = "Cohort", title = "FC CXCL13 vs Age", 
           xLabel = "Age", yLabel = "Plasma CXCL13 fold-change", nonparam=T) + scale_y_continuous(limits=c(0,5))
## Warning in cor.test.default(data1[, xData], data1[, yData], method = "kendall"):
## Cannot compute exact p-value with ties
## Warning in cor.test.default(data2[, xData], data2[, yData], method = "kendall"):
## Cannot compute exact p-value with ties
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 54 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 55 rows containing non-finite values (stat_smooth).
## Warning: Removed 54 rows containing missing values (geom_point).
## Warning: Removed 55 rows containing missing values (geom_point).

subsetData <- subset(mergedData, Cohort == 'Healthy' & TimeCategory == "baseline")
subsetData <- subsetData[,grep( paste(c("FCtfh_oW","FCPB_oW", "FCCXCL13_oW","Age"), collapse = "|"), names(subsetData))]  
cor.matrix <- cor(subsetData, method="kendall" , use="pairwise.complete.obs" )
cor.matrix.pmat <- ggcorrplot::cor_pmat(subsetData, method="kendall", use="pairwise.complete.obs"  )
## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties
## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties
cor.matrix <- as.data.frame(cor.matrix); cor.matrix$Labels <- row.names(cor.matrix); cor.matrix$Cohort <- "Healthy"
cor.matrix <- merge( x = cor.matrix, y = cor.matrix.pmat[,"Age"], by = "row.names"); names(cor.matrix)[grep("y",names(cor.matrix))] <- "Pvalue"

subsetData <- subset(mergedData, Cohort == 'anti-PD-1' & TimeCategory == "baseline")
subsetData <- subsetData[,grep( paste(c("FCtfh_oW","FCPB_oW", "FCCXCL13_oW","Age"), collapse = "|"), names(subsetData))]  
cor.matrix2 <- cor(subsetData, method="kendall" , use="pairwise.complete.obs" )
cor.matrix2.pmat <- ggcorrplot::cor_pmat(subsetData, method="kendall", use="pairwise.complete.obs"  )
## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties
cor.matrix2 <- as.data.frame(cor.matrix2); cor.matrix2$Labels <- row.names(cor.matrix2); cor.matrix2$Cohort <- "anti-PD-1"
cor.matrix2 <- merge( x = cor.matrix2, y = cor.matrix2.pmat[,"Age"], by = "row.names"); names(cor.matrix2)[grep("^y",names(cor.matrix2))] <- "Pvalue"

temp <- as.data.frame(rbind(cor.matrix, cor.matrix2)); temp <- temp[ -grep( paste( c("Age"), collapse = "|"), temp$Row.names),]

temp$Labels <- str_replace(temp$Labels, pattern = "FCCXCL13_oW", replacement = "Fold-change \n plasma CXCL13\nat 1 week" )
temp$Labels <- str_replace(temp$Labels, pattern = "FCtfh_oW", replacement = "Fold-change \n ICOS+CD38+ cTfh\nat 1 week" )
temp$Labels <- str_replace(temp$Labels, pattern = "FCPB_oW", replacement = "Fold-change \n plasmablasts\nat 1 week" )
temp$Labels <- factor(temp$Labels, levels = c("Fold-change \n plasma CXCL13\nat 1 week" , "Fold-change \n plasmablasts\nat 1 week","Fold-change \n ICOS+CD38+ cTfh\nat 1 week"))
ggplot( data = temp, aes(x = Labels,y = Age, fill = Cohort)) + geom_bar(stat='identity',position = position_dodge(width=0.5), width=0.05, color="black", size=0.1) + 
  geom_point(aes(fill=Cohort, size=Pvalue), pch=21, color="black", stroke=0.2, position = position_dodge(width=0.5)) + 
  theme_bw() + scale_size(range = c(18,1), breaks = c(0,0.05,0.1,0.3,0.7), limits = c(0,0.8), trans = 'sqrt') + guides(size = guide_legend(reverse=TRUE)) + 
  scale_fill_manual(values=c("#FFB18C", "#7FAEDB")) + ylab("Kendall's tau vs\nAge") + xlab(" ") + ggtitle(" ") + geom_hline(yintercept=0, linetype = "dashed") +
  theme(axis.text.y = element_text(size = 28, color="black"), plot.title = element_text(size=28), axis.text.x = element_text(size=22, color="black", angle=45, hjust=1,vjust=1), 
        axis.title.x = element_text(size=28, color="black"), legend.text = element_text(size=22), legend.title = element_text(size=22)) + 
  coord_flip() + scale_y_continuous(limits = c(-1,0.5), breaks = seq(-1,1,0.25))+ theme(panel.grid.minor = element_blank(), panel.grid.major=element_blank()) 
## Warning: Removed 3 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/Fold-changes_Age_correlations.pdf", device = "pdf", width=8)
# write.csv(x = temp, file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e1n.csv")



cTfhCorrel <- function( data, subset )
{
  z <- vector()
  z[1] <- subset
  z[2] <- round(cor(data[ which(data$Cohort == "Healthy"), subset], data[ which(data$Cohort == "Healthy") ,"cTfh_ICOShiCD38hi_..FreqParent"], 
                    method = "kendall", use = "complete.obs"), 2)
  z[3] <- round(cor(data[ which(data$Cohort == "anti-PD-1"), subset], data[ which(data$Cohort == "anti-PD-1"),"cTfh_ICOShiCD38hi_..FreqParent"], 
                    method = "kendall", use = "complete.obs"), 2)
  return(z)
}
result <- data.frame(CellType = 0, Healthy = 0, aPD1 = 0)
result[1,] <- cTfhCorrel(baseline, subset = "IgDlo_CD71hi_ActBCells...FreqParent")
result[2,] <- cTfhCorrel(baseline, subset = "IgDlo_CD71hi_CD20loCD71hi...FreqParent")
result$CellType <- str_replace(result$CellType, "IgDlo_CD71hi_ActBCells...FreqParent", "ABC \n(%CD19)")
result$CellType <- str_replace(result$CellType, "IgDlo_CD71hi_CD20loCD71hi...FreqParent", "ASC \n(%CD19)")
result <- melt(result, id.vars = "CellType", measure.vars = c("Healthy","aPD1")); result$value <- as.numeric(result$value)
result$CellType <- factor(result$CellType, levels = c("ASC \n(%CD19)",  "ABC \n(%CD19)"))
ggplot(result, aes(x=CellType, fill=variable, color="bl")) + geom_bar(aes(y = value), stat='identity', position="dodge" ) + theme_bw()  + 
  scale_fill_manual(values=c("#7FAEDB", "#FFB18C")) + scale_color_manual(values="black") + ggtitle(expression(paste("cTfh correlation"))) + 
  ylab("Kendall t") + 
  theme(axis.text.x = element_text(angle = 0), axis.title = element_text(size=28,hjust = 0.5), plot.title = element_text(size=28,hjust = 0.5), 
        axis.title.x = element_blank(), axis.text = element_text(size=22, color="black"), legend.position = "none") + 
  scale_y_continuous(breaks = seq(-1,1,0.1), limits=c(-0.2,0.6)) + theme(panel.grid.minor = element_blank(), panel.grid.major=element_blank()) 

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfh-d0_vs_various-d0_pearsons_freqCD19.pdf", device = "pdf", width=4, height=6)
# write.csv(x = result, file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e4q-3.csv")



oneWeek <- subset(mergedData, TimeCategory == "oneWeek")
oneWeek$IgDlo_CD71hi_ActBCells...FreqParent <-  oneWeek$IgDlo_CD71hi_ActBCells...FreqParent * oneWeek$IgDlo_CD71hi_..FreqParent * oneWeek$CD19hi_NotNaiveB_freqParent / 10000
oneWeek$IgDlo_CD71hi_CD20loCD71hi...FreqParent <-  oneWeek$IgDlo_CD71hi_CD20loCD71hi...FreqParent * oneWeek$IgDlo_CD71hi_..FreqParent * oneWeek$CD19hi_NotNaiveB_freqParent / 10000

result <- data.frame(CellType = 0, Healthy = 0, aPD1 = 0)
result[1,] <- cTfhCorrel(oneWeek, subset = "IgDlo_CD71hi_ActBCells...FreqParent")
result[2,] <- cTfhCorrel(oneWeek, subset = "IgDlo_CD71hi_CD20loCD71hi...FreqParent")
result$CellType <- str_replace(result$CellType, "IgDlo_CD71hi_ActBCells...FreqParent", "ABC \n(% CD19)")
result$CellType <- str_replace(result$CellType, "IgDlo_CD71hi_CD20loCD71hi...FreqParent", "ASC \n(% CD19)")
result <- melt(result, id.vars = "CellType", measure.vars = c("Healthy","aPD1")); result$value <- as.numeric(result$value)
result$CellType <- factor(result$CellType, levels = c("ASC \n(% CD19)",  "ABC \n(% CD19)"))
ggplot(result, aes(x=CellType, fill=variable, color="bl")) + geom_bar(aes(y = value), stat='identity', position="dodge" ) + theme_bw()  + 
  scale_fill_manual(values=c("#7FAEDB", "#FFB18C")) + scale_color_manual(values="black") + ggtitle("cTfh correlation") + 
  ylab("Kendall t") + 
  theme(axis.text.x = element_text(angle = 0), axis.title = element_text(size=28,hjust = 0.5), plot.title = element_text(size=28,hjust = 0.5), 
        axis.title.x = element_blank(), axis.text = element_text(size=22, color="black"), legend.position = "none") + 
  scale_y_continuous(breaks = seq(-1,1,0.1), limits=c(-0.2,0.6))+ theme(panel.grid.minor = element_blank(), panel.grid.major=element_blank()) 

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfh-d7_vs_various-d7_pearsons_freqCD19.pdf", device = "pdf", width=4, height=6)
# write.csv(x = result, file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e4r-3.csv")

———– CXCL13 ——————–

subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('dbCode', 'TimeCategory', 'Cohort','Year'), measure.vars = c("Plasma.CXCL13..pg.mL."))
prePostTimeAveraged(melted, title = "CXCL13", xLabel = NULL, yLabel = "Plasma CXCL13 (pg/mL)") + scale_y_continuous(breaks=seq(0,200,5), limits=c(50,110))

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/CXCL13_TimeAveraged.pdf", device="pdf", width=4)
summary( fit <- aov(value ~ Cohort+TimeCategory + Cohort:TimeCategory, data=melted ) );  tukey_hsd(fit)
##                      Df Sum Sq Mean Sq F value  Pr(>F)   
## Cohort                1   1988    1988   3.289 0.07178 . 
## TimeCategory          2   7206    3603   5.961 0.00324 **
## Cohort:TimeCategory   2   4476    2238   3.702 0.02698 * 
## Residuals           148  89458     604                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 2 observations deleted due to missingness
## # A tibble: 19 x 9
##    term  group1 group2 null.value estimate conf.low conf.high   p.adj
##  * <chr> <chr>  <chr>       <dbl>    <dbl>    <dbl>     <dbl>   <dbl>
##  1 Coho~ Healt~ anti-~          0   7.20     -0.645     15.0  0.0718 
##  2 Time~ basel~ oneWe~          0  14.6       3.18      26.1  0.00824
##  3 Time~ basel~ late            0  -0.376   -11.7       10.9  0.997  
##  4 Time~ oneWe~ late            0 -15.0     -26.8       -3.20 0.00865
##  5 Coho~ Healt~ anti-~          0   3.14    -15.7       22.0  0.997  
##  6 Coho~ Healt~ Healt~          0   5.22    -14.1       24.5  0.97   
##  7 Coho~ Healt~ anti-~          0  29.5       8.60      50.5  0.00105
##  8 Coho~ Healt~ Healt~          0   1.41    -17.9       20.7  1      
##  9 Coho~ Healt~ anti-~          0   0.0782  -20.1       20.2  1      
## 10 Coho~ anti-~ Healt~          0   2.09    -16.7       20.9  1      
## 11 Coho~ anti-~ anti-~          0  26.4       5.91      46.9  0.00377
## 12 Coho~ anti-~ Healt~          0  -1.73    -20.6       17.1  1      
## 13 Coho~ anti-~ anti-~          0  -3.06    -22.7       16.6  0.998  
## 14 Coho~ Healt~ anti-~          0  24.3       3.37      45.3  0.0128 
## 15 Coho~ Healt~ Healt~          0  -3.81    -23.1       15.5  0.993  
## 16 Coho~ Healt~ anti-~          0  -5.15    -25.3       15.0  0.977  
## 17 Coho~ anti-~ Healt~          0 -28.1     -49.1       -7.19 0.00215
## 18 Coho~ anti-~ anti-~          0 -29.5     -51.2       -7.76 0.00185
## 19 Coho~ Healt~ anti-~          0  -1.33    -21.5       18.8  1      
## # ... with 1 more variable: p.adj.signif <chr>
# write.csv(melted, file="D:/Pembro-Fluvac/Analysis/sourceDataExports/1g.csv")

subsetData <- subset(melted, TimeCategory == "oneWeek")
rownames(subsetData) <- seq(1:nrow(subsetData))
twoSampleBar(data=subsetData, xData="Cohort", yData="value", fillParam="Cohort", title="CXCL13\nOne week", yLabel="plasma CXCL13 (pg/mL)") + 
  scale_y_continuous(limits = c(0,250))
## [1] "path 1:  ttest == T && batch == none && nonparam == F"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"
## Warning: Removed 1 rows containing missing values (position_quasirandom).

aggregate( subsetData, by= list(subsetData$variable, subsetData$TimeCategory, subsetData$Cohort), FUN=mean, na.rm = T)
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA

## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA

## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA

## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA

## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA

## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA

## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
##                 Group.1 Group.2   Group.3 dbCode TimeCategory Cohort     Year
## 1 Plasma.CXCL13..pg.mL. oneWeek   Healthy     NA           NA     NA 2.555556
## 2 Plasma.CXCL13..pg.mL. oneWeek anti-PD-1     NA           NA     NA 2.666667
##   variable    value
## 1       NA 61.04413
## 2       NA 85.35924
FC_response <- dcast( subset(mergedData, TimeCategory != "late" & Cohort != "nonPD1" ), `Subject`+`Cohort`~`TimeCategory`, value.var = c("Plasma.CXCL13..pg.mL.")) 
FC_response$FCcxcl13 <- FC_response$oneWeek / FC_response$baseline
t.test(FC_response[which(FC_response$Cohort == "anti-PD-1"), "baseline"], FC_response[which(FC_response$Cohort == "anti-PD-1"), "oneWeek"], paired = T)
## 
##  Paired t-test
## 
## data:  FC_response[which(FC_response$Cohort == "anti-PD-1"), "baseline"] and FC_response[which(FC_response$Cohort == "anti-PD-1"), "oneWeek"]
## t = -5.5658, df = 19, p-value = 2.283e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -32.51086 -14.74144
## sample estimates:
## mean of the differences 
##               -23.62615
t.test(FC_response[which(FC_response$Cohort == "Healthy"), "baseline"], FC_response[which(FC_response$Cohort == "Healthy"), "oneWeek"], paired = T)
## 
##  Paired t-test
## 
## data:  FC_response[which(FC_response$Cohort == "Healthy"), "baseline"] and FC_response[which(FC_response$Cohort == "Healthy"), "oneWeek"]
## t = -1.4723, df = 26, p-value = 0.1529
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -12.518866   2.069751
## sample estimates:
## mean of the differences 
##               -5.224558
subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek")
twoSampleBar(data=subsetData, xData="Cohort", yData="FCCXCL13_oW", fillParam="Cohort",FCplot =T, 
             title="CXCL13", yLabel="Fold-change at one week")+ scale_y_continuous(limits=c(0,3), breaks = seq(0,50,0.5))
## [1] "path 1:  ttest == T && batch == none && nonparam == F"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"
## Warning: Removed 1 rows containing missing values (position_quasirandom).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/CXCL13_foldChange_oW.pdf", width = 4)
# write.csv(subsetData[,c("dbCode","Cohort","FCCXCL13_oW")], file="D:/Pembro-FluVac/Analysis/sourceDataExports/1h.csv")

subsetData <- subset(mergedData, Cohort != "nonPD1" )
prePostTime(subsetData, xData = "TimeCategory", yData = "Plasma.CXCL13..pg.mL.", fillParam = "Cohort", title = "CXCL13", xLabel = "TimeCategory",
            yLabel = "Plasma CXCL13 (pg/mL)", groupby = "dbCode") + scale_y_continuous(breaks=seq(0,240,20))     # limits = c(0,45)
## Warning: Removed 2 rows containing non-finite values (stat_summary).
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## Warning: Removed 2 row(s) containing missing values (geom_path).
## Warning: Removed 2 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/CXCL13_prePostTime.pdf", device="pdf")
# write.csv(subsetData[,c("dbCode","Cohort", "TimeCategory","Plasma.CXCL13..pg.mL.")], file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e2e.csv")
univScatter(data = subset(mergedData, TimeCategory == "baseline" & Cohort == "anti-PD-1"), xData = "Plasma.CXCL13..pg.mL.",  fillParam = "dummy", position = 'right',
            yData = "FCCXCL13_oW", xLabel = "baseline CXCL13 (pg/mL)", yLabel = "Fold-change CXCL13", title = "CXCL13 in anti-PD-1") + scale_fill_manual(values=c("#FFB18C"))+
  scale_y_continuous(breaks = seq(0,5,1),limits=c(0,3))
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 10 rows containing non-finite values (stat_smooth).
## Warning: Removed 10 rows containing missing values (geom_point).

summary(lm( Plasma.CXCL13..pg.mL. ~ Cohort + Year + TimeCategory, data=mergedData))
## 
## Call:
## lm(formula = Plasma.CXCL13..pg.mL. ~ Cohort + Year + TimeCategory, 
##     data = mergedData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -36.786 -14.205  -2.952   5.447 146.105 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          42.0682     8.8791   4.738 4.69e-06 ***
## Cohortanti-PD-1       8.1888     3.9491   2.074  0.03970 *  
## CohortnonPD1          6.7758     7.2866   0.930  0.35381    
## Year                  4.4333     3.2063   1.383  0.16866    
## TimeCategoryoneWeek  14.2214     4.6535   3.056  0.00262 ** 
## TimeCategorylate     -0.3203     4.5067  -0.071  0.94343    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 24.38 on 162 degrees of freedom
##   (5 observations deleted due to missingness)
## Multiple R-squared:  0.1008, Adjusted R-squared:  0.0731 
## F-statistic: 3.634 on 5 and 162 DF,  p-value: 0.003835
summary(lm( FCCXCL13_oW ~ Cohort + FCPB_oW + FChai_late, data=mergedData))
## 
## Call:
## lm(formula = FCCXCL13_oW ~ Cohort + FCPB_oW + FChai_late, data = mergedData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8013 -0.2238 -0.1329  0.1885  1.1130 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      0.88302    0.12836   6.879 1.41e-09 ***
## Cohortanti-PD-1  0.34844    0.10778   3.233  0.00181 ** 
## FCPB_oW         -0.02106    0.02158  -0.976  0.33234    
## FChai_late       0.26501    0.10193   2.600  0.01118 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4254 on 77 degrees of freedom
##   (92 observations deleted due to missingness)
## Multiple R-squared:  0.2844, Adjusted R-squared:  0.2565 
## F-statistic:  10.2 on 3 and 77 DF,  p-value: 9.887e-06
subsetData <- subset(mergedData, Cohort == 'Healthy' & TimeCategory == "baseline")
subsetData <- subsetData[,grep( paste(c("FCtfh_oW","FCPB_oW", "FCCXCL13_oW","FCH1N1_hai_late","FCH3N2_hai_late","FCFluB_hai_late"), collapse = "|"), names(subsetData))]  
cor.matrix <- cor(subsetData, method="kendall" , use="pairwise.complete.obs" )
cor.matrix.pmat <- ggcorrplot::cor_pmat(subsetData, method="kendall", use="pairwise.complete.obs"  )
## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties
## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties
cor.matrix <- as.data.frame(cor.matrix); cor.matrix$Labels <- row.names(cor.matrix); cor.matrix$Cohort <- "Healthy"
cor.matrix <- merge( x = cor.matrix, y = cor.matrix.pmat[,"FCCXCL13_oW"], by = "row.names"); names(cor.matrix)[grep("y",names(cor.matrix))] <- "Pvalue"

subsetData <- subset(mergedData, Cohort == 'anti-PD-1' & TimeCategory == "baseline")
subsetData <- subsetData[,grep( paste(c("FCtfh_oW","FCPB_oW", "FCCXCL13_oW","FCH1N1_hai_late","FCH3N2_hai_late","FCFluB_hai_late"), collapse = "|"), names(subsetData))]  
cor.matrix2 <- cor(subsetData, method="kendall" , use="pairwise.complete.obs" )
cor.matrix2.pmat <- ggcorrplot::cor_pmat(subsetData, method="kendall", use="pairwise.complete.obs"  )
## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties
cor.matrix2 <- as.data.frame(cor.matrix2); cor.matrix2$Labels <- row.names(cor.matrix2); cor.matrix2$Cohort <- "anti-PD-1"
cor.matrix2 <- merge( x = cor.matrix2, y = cor.matrix2.pmat[,"FCCXCL13_oW"], by = "row.names"); names(cor.matrix2)[grep("^y",names(cor.matrix2))] <- "Pvalue"

temp <- as.data.frame(rbind(cor.matrix, cor.matrix2)); temp <- temp[ -grep( paste( c("FCCXCL13"), collapse = "|"), temp$Row.names),]
temp$Labels <- str_replace(temp$Labels, pattern = "FCtfh_oW", replacement = "Fold-change\n ICOS+CD38+ cTfh\nat 1 week" )
temp$Labels <- str_replace(temp$Labels, pattern = "FCPB_oW", replacement = "Fold-change\n plasmablasts\nat 1 week" )
temp$Labels <- str_replace(temp$Labels, pattern = "FCH1N1_hai_late", replacement = "Fold-change\n H1N1 HAI\nat late" )
temp$Labels <- str_replace(temp$Labels, pattern = "FCH3N2_hai_late", replacement = "Fold-change\n H3N2 HAI\nat late" )
temp$Labels <- str_replace(temp$Labels, pattern = "FCFluB_hai_late", replacement = "Fold-change\n FluB HAI\nat late" )
temp$Labels <- factor(temp$Labels, levels = c("Fold-change\n FluB HAI\nat late","Fold-change\n H3N2 HAI\nat late","Fold-change\n H1N1 HAI\nat late" ,
                                              "Fold-change\n plasmablasts\nat 1 week", "Fold-change\n ICOS+CD38+ cTfh\nat 1 week"))
ggplot( data = temp, aes(x = Labels,y = FCCXCL13_oW, fill = Cohort)) + geom_bar(stat='identity',position = position_dodge(width=0.5), width=0.05, color="black", size=0.1) + 
  geom_point(aes(fill=Cohort, size=Pvalue), pch=21, color="black", stroke=0.2, position = position_dodge(width=0.5)) + 
  theme_bw() + scale_size(range = c(18,1), breaks = c(0,0.05,0.1,0.3,0.7), limits = c(0,0.8), trans = 'sqrt') + guides(size = guide_legend(reverse=TRUE)) + 
  scale_fill_manual(values=c("#FFB18C", "#7FAEDB")) + ylab("Kendall's tau vs\nFold-change in CXCL13") + xlab(" ") + ggtitle(" ") + geom_hline(yintercept=0, linetype = "dashed") +
  theme(axis.text.y = element_text(size = 28, color="black"), plot.title = element_text(size=28), axis.text.x = element_text(size=22, color="black", angle=45, hjust=1,vjust=1), 
        axis.title.x = element_text(size=28, color="black"), legend.text = element_text(size=22), legend.title = element_text(size=22)) + theme(panel.grid.minor = element_blank(), panel.grid.major=element_blank())+ 
  coord_flip() + scale_y_continuous(limits = c(-1,1), breaks = seq(-1,1,0.25))
## Warning: Removed 2 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/Fold-changes_CXCL13_correlations.pdf", device = "pdf", width=9, height=10)
# write.csv(temp, file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e2h.csv")

———– Vaccine response determinants and biomarkers ——————–

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "baseline")
  a<-twoSampleBar(data=subsetData, xData="Cohort", yData="H1N1pdm09.HAI.titer", fillParam="Cohort", batch="none", title="Baseline - H1N1", yLabel="H1N1 titer", nonparam=T) +
  scale_y_continuous(trans='log2', breaks=c(2^(2:14) ) ) +  coord_cartesian(ylim = c(4,8192))
## [1] "path 2:  ttest == T && batch == none && nonparam == T"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"
# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/HAItiter_late_byCohort.pdf", device="pdf", width=4.5)
  b<-twoSampleBar(data=subsetData, xData="Cohort", yData="H3N2.HAI.titer", fillParam="Cohort", batch="none", title="Baseline - H3N2", yLabel="H3N2 titer", nonparam=T) +
  scale_y_continuous(trans='log2', breaks=c(2^(2:14) ) ) +  coord_cartesian(ylim = c(4,8192))
## [1] "path 2:  ttest == T && batch == none && nonparam == T"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"
# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/HAItiter_late_byCohort.pdf", device="pdf", width=4.5)
  c<-twoSampleBar(data=subsetData, xData="Cohort", yData="FluB.HAI.titer", fillParam="Cohort", batch="none", title="Baseline - FluB", yLabel="FluB titer", nonparam=T) +
  scale_y_continuous(trans='log2', breaks=c(2^(2:14) ) ) +  coord_cartesian(ylim = c(4,8192))
## [1] "path 2:  ttest == T && batch == none && nonparam == T"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"
# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/HAItiter_late_byCohort.pdf", device="pdf", width=4.5)
grid.arrange(a,b,c,nrow=1)
## Warning: Removed 12 rows containing missing values (position_quasirandom).

## Warning: Removed 12 rows containing missing values (position_quasirandom).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/HAI_baselinetiters_3strains.pdf", arrangeGrob(a,b,c,nrow=1), width=14, height=8)
# write.csv(subsetData[,c("dbCode","Cohort","TimeCategory","H1N1pdm09.HAI.titer", "H3N2.HAI.titer", "FluB.HAI.titer")], file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e3b.csv")

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "baseline")
subsetData %>% group_by(Cohort) %>% get_summary_stats(FCH1N1_hai_late, FCH3N2_hai_late, FCFluB_hai_late)
## # A tibble: 6 x 14
##   Cohort variable     n   min   max median    q1    q3   iqr   mad  mean    sd
##   <fct>  <chr>    <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Healt~ FCFluB_~    15  1        4      2   2       4   2    0     2.73  1.1 
## 2 Healt~ FCH1N1_~    27  0.5      8      2   1       2   1    1.48  1.98  1.54
## 3 Healt~ FCH3N2_~    15  1        8      2   1       3   2    1.48  2.47  1.88
## 4 anti-~ FCFluB_~    22  1       32      4   2.5     4   1.5  1.48  6.5   7.31
## 5 anti-~ FCH1N1_~    24  0.25    64      4   2      16  14    5.00 11.4  15.0 
## 6 anti-~ FCH3N2_~    22  1       64      4   4       8   4    2.96  9.14 13.9 
## # ... with 2 more variables: se <dbl>, ci <dbl>
subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "baseline")
  a<-twoSampleBar(data=subsetData, xData="Cohort", yData="FCH1N1_hai_late", fillParam="Cohort", title="H1N1", yLabel="Fold-change in HAI", nonparam = T) + 
  scale_y_continuous(trans='log2',breaks=c(2^(0:14)), limits = c(1,175))
## [1] "path 2:  ttest == T && batch == none && nonparam == T"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"
  b<-twoSampleBar(data=subsetData, xData="Cohort", yData="FCH3N2_hai_late", fillParam="Cohort", title="H3N2", yLabel="Fold-change in HAI", nonparam = T) + 
  scale_y_continuous(trans='log2',breaks=c(2^(0:14)), limits = c(1,175))
## [1] "path 2:  ttest == T && batch == none && nonparam == T"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"
  c<-twoSampleBar(data=subsetData, xData="Cohort", yData="FCFluB_hai_late", fillParam="Cohort", title="Influenza B", yLabel="Fold-change in HAI", nonparam = T) + 
  scale_y_continuous(trans='log2',breaks=c(2^(0:14)), limits = c(1,175))
## [1] "path 2:  ttest == T && batch == none && nonparam == T"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"
grid.arrange(a,b,c,nrow=1)
## Warning: Removed 8 rows containing missing values (position_quasirandom).
## Warning: Removed 20 rows containing missing values (position_quasirandom).

## Warning: Removed 20 rows containing missing values (position_quasirandom).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/Seroconversion_3strains.pdf", arrangeGrob(a,b,c,nrow=1), width=11)
# write.csv(subsetData[,c("dbCode","Cohort","FCH1N1_hai_late", "FCH3N2_hai_late", "FCFluB_hai_late")], file="D:/Pembro-Fluvac/Analysis/sourceDataExports/2a.csv")


plotSeroProtection <- function(STRAIN, PLOTTITLE)
{
  subsetData <- subset(mergedData, Cohort != "nonPD1" )
  melted <- melt(subsetData, id.vars = c('dbCode', 'TimeCategory', 'Cohort'), measure.vars = STRAIN); melted <- melted[ which( !is.na(melted$value) ),]
  overTime <- aggregate( melted$value, by= list(melted$variable, melted$TimeCategory, melted$Cohort), FUN=mean, na.rm = T); overTime
  aPD1_seroprot <- length(which(melted$value > 40 & melted$Cohort == "anti-PD-1" & melted$TimeCategory == "late") )
  aPD1_total <- length(which(melted$Cohort == "anti-PD-1" & melted$TimeCategory == "late") )
  HC_seroprot <- length(which(melted$value > 40 & melted$Cohort == "Healthy" & melted$TimeCategory == "late") )
  HC_total <- length(which(melted$Cohort == "Healthy" & melted$TimeCategory == "late") )
  continTable <- matrix(c(aPD1_seroprot, aPD1_total - aPD1_seroprot, HC_seroprot, HC_total - HC_seroprot), ncol=2); continTable
  print( fisher.test(continTable))
  
  seroprot <- data.frame( Cohort = c("Healthy", "anti-PD-1"), Seroprot = c(HC_seroprot / HC_total, aPD1_seroprot / aPD1_total))
  seroprot$Cohort <- factor(seroprot$Cohort, levels = c("Healthy", "anti-PD-1"))
  return( ggplot(data=seroprot, aes(x=Cohort, y=Seroprot, fill=Cohort,width=0.6)) + scale_fill_manual(values = c("#7FAEDB", "#FFB18C")) +  
    geom_bar( position = position_dodge(), stat = "identity", color="black",size=0.1) + ggtitle(PLOTTITLE) + ylab("Proportion seroprotected") +  theme_bw() +
    theme(axis.text = element_text(size=28,hjust = 0.5,color="black"), axis.title = element_text(size=28,hjust = 0.5), axis.title.x = element_blank(), plot.title = element_text(size=32,hjust = 0.5)) + 
    theme(legend.position = "none", axis.text.x = element_text(angle=45,hjust=1,vjust=1)) + theme(panel.grid.minor = element_blank(), panel.grid.major=element_blank()) +
    coord_cartesian(ylim = c(0,1)) + scale_y_continuous(breaks = seq(0,1,0.1) )    ) 
}

  a <- plotSeroProtection(STRAIN = "H1N1pdm09.HAI.titer", PLOTTITLE="H1N1"); a
## 
##  Fisher's Exact Test for Count Data
## 
## data:  continTable
## p-value = 0.3539
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##    0.3507695 205.1583228
## sample estimates:
## odds ratio 
##   3.904467

  b <- plotSeroProtection(STRAIN = "H3N2.HAI.titer", PLOTTITLE="H3N2"); b
## 
##  Fisher's Exact Test for Count Data
## 
## data:  continTable
## p-value = 0.4054
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.0376066       Inf
## sample estimates:
## odds ratio 
##        Inf

  c <- plotSeroProtection(STRAIN = "FluB.HAI.titer", PLOTTITLE="Influenza B"); c
## 
##  Fisher's Exact Test for Count Data
## 
## data:  continTable
## p-value = 1
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##   0.01131676 15.13008261
## sample estimates:
## odds ratio 
##  0.7205995

grid.arrange(a,b,c,nrow=1)

  # ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/Seroprotection_3strains.pdf", arrangeGrob(a,b,c,nrow=1), width=11)
  # write.csv(a$data, file="D:/Pembro-Fluvac/Analysis/sourceDataExports/2b-1.csv")
  # write.csv(b$data, file="D:/Pembro-Fluvac/Analysis/sourceDataExports/2b-2.csv")
  # write.csv(c$data, file="D:/Pembro-Fluvac/Analysis/sourceDataExports/2b-3.csv")

subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('dbCode', 'TimeCategory', 'Cohort'), measure.vars = "H1N1pdm09.HAI.titer"); melted <- melted[ which( !is.na(melted$value) ),]
  a<-prePostTimeAveraged(melted, title = "H1N1", xLabel = NULL, yLabel = "H1N1 HAI titer") + scale_y_continuous(trans = 'log2', breaks=c(2^(1:14)), limits = c(2,2500)) +
  geom_hline(yintercept = 40, linetype = "dashed", color="grey50") + annotate("text",x = 1.5, y=30, label="Seroprotection", size=7,color="grey50")
Anova(fit <- lm( value ~ Cohort * TimeCategory, data = melted)); tukey_hsd(fit)
## Anova Table (Type II tests)
## 
## Response: value
##                       Sum Sq  Df F value   Pr(>F)   
## Cohort                240742   1  2.4645 0.118563   
## TimeCategory         1085493   2  5.5562 0.004707 **
## Cohort:TimeCategory   213572   2  1.0932 0.337816   
## Residuals           14554664 149                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## # A tibble: 19 x 9
##    term  group1 group2 null.value estimate conf.low conf.high   p.adj
##  * <chr> <chr>  <chr>       <dbl>    <dbl>    <dbl>     <dbl>   <dbl>
##  1 Coho~ Healt~ anti-~          0    -87.4   -187.       11.9 0.084  
##  2 Time~ basel~ oneWe~          0    101.     -44.6     247.  0.231  
##  3 Time~ basel~ late            0    200.      57.8     343.  0.00315
##  4 Time~ oneWe~ late            0     99.2    -50.4     249.  0.262  
##  5 Coho~ Healt~ anti-~          0   -160.    -400.       79.2 0.387  
##  6 Coho~ Healt~ Healt~          0     62.5   -183.      308.  0.977  
##  7 Coho~ Healt~ anti-~          0    -23.9   -290.      242.  1      
##  8 Coho~ Healt~ Healt~          0    113.    -133.      358.  0.772  
##  9 Coho~ Healt~ anti-~          0    131.    -123.      384.  0.672  
## 10 Coho~ anti-~ Healt~          0    223.     -16.7     462.  0.0842 
## 11 Coho~ anti-~ anti-~          0    136.    -124.      397.  0.658  
## 12 Coho~ anti-~ Healt~          0    273.      33.4     512.  0.0156 
## 13 Coho~ anti-~ anti-~          0    291.      43.5     538.  0.0111 
## 14 Coho~ Healt~ anti-~          0    -86.4   -353.      180.  0.936  
## 15 Coho~ Healt~ Healt~          0     50.1   -196.      296.  0.992  
## 16 Coho~ Healt~ anti-~          0     68.    -185.      321.  0.971  
## 17 Coho~ anti-~ Healt~          0    136.    -130.      403.  0.677  
## 18 Coho~ anti-~ anti-~          0    154.    -119.      428.  0.579  
## 19 Coho~ Healt~ anti-~          0     17.9   -235.      271.  1      
## # ... with 1 more variable: p.adj.signif <chr>
write.csv(melted, file="D:/Pembro-Fluvac/Analysis/sourceDataExports/2c-1.csv")
melted <- melt(subsetData, id.vars = c('dbCode', 'TimeCategory', 'Cohort'), measure.vars = "H3N2.HAI.titer"); melted <- melted[ which( !is.na(melted$value) ),]
  b<-prePostTimeAveraged(melted, title = "H3N2", xLabel = NULL, yLabel = "H3N2 HAI titer") + scale_y_continuous(trans = 'log2', breaks=c(2^(1:14)), limits = c(2,2500)) +
  geom_hline(yintercept = 40, linetype = "dashed", color="grey50") + annotate("text",x = 1.5, y=30, label="Seroprotection", size=7,color="grey50")
Anova(fit <- lm( value ~ Cohort * TimeCategory, data = melted)); tukey_hsd(fit)
## Anova Table (Type II tests)
## 
## Response: value
##                      Sum Sq  Df F value    Pr(>F)    
## Cohort              1709874   1 20.1589 1.752e-05 ***
## TimeCategory        1143350   2  6.7399  0.001728 ** 
## Cohort:TimeCategory    6480   2  0.0382  0.962532    
## Residuals           9415018 111                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## # A tibble: 19 x 9
##    term  group1 group2 null.value estimate conf.low conf.high   p.adj
##  * <chr> <chr>  <chr>       <dbl>    <dbl>    <dbl>     <dbl>   <dbl>
##  1 Coho~ Healt~ anti-~          0  -261.     -371.    -151.   7.04e-6
##  2 Time~ basel~ oneWe~          0    89.3     -66.7    245.   3.66e-1
##  3 Time~ basel~ late            0   236.       82.6    390.   1.15e-3
##  4 Time~ oneWe~ late            0   147.      -16.2    310.   8.67e-2
##  5 Coho~ Healt~ anti-~          0  -257.     -524.       9.76 6.59e-2
##  6 Coho~ Healt~ Healt~          0    92.8    -216.     401.   9.52e-1
##  7 Coho~ Healt~ anti-~          0  -170.     -459.     118.   5.28e-1
##  8 Coho~ Healt~ Healt~          0   219.      -89.7    527.   3.18e-1
##  9 Coho~ Healt~ anti-~          0    -8.82   -292.     274.   1.00e+0
## 10 Coho~ anti-~ Healt~          0   350.       83.0    617.   3.15e-3
## 11 Coho~ anti-~ anti-~          0    87.2    -157.     331.   9.04e-1
## 12 Coho~ anti-~ Healt~          0   476.      209.     743.   1.54e-5
## 13 Coho~ anti-~ anti-~          0   249.       11.4    486.   3.42e-2
## 14 Coho~ Healt~ anti-~          0  -263.     -551.      25.6  9.56e-2
## 15 Coho~ Healt~ Healt~          0   126.     -183.     434.   8.44e-1
## 16 Coho~ Healt~ anti-~          0  -102.     -384.     181.   9.03e-1
## 17 Coho~ anti-~ Healt~          0   389.      100.     677.   2.17e-3
## 18 Coho~ anti-~ anti-~          0   161.      -99.6    422.   4.75e-1
## 19 Coho~ Healt~ anti-~          0  -227.     -510.      55.3  1.90e-1
## # ... with 1 more variable: p.adj.signif <chr>
# write.csv(melted, file="D:/Pembro-Fluvac/Analysis/sourceDataExports/2c-2.csv")
melted <- melt(subsetData, id.vars = c('dbCode', 'TimeCategory', 'Cohort'), measure.vars = "FluB.HAI.titer"); melted <- melted[ which( !is.na(melted$value) ),]
  c<-prePostTimeAveraged(melted, title = "Influenza B", xLabel = NULL, yLabel = "Influenza B HAI titer") + scale_y_continuous(trans = 'log2', breaks=c(2^(1:14)), limits = c(2,2500)) +
  geom_hline(yintercept = 40, linetype = "dashed", color="grey50") + annotate("text",x = 1.5, y=30, label="Seroprotection", size=7,color="grey50")
Anova(fit <- lm( value ~ Cohort * TimeCategory, data = melted)); tukey_hsd(fit)
## Anova Table (Type II tests)
## 
## Response: value
##                      Sum Sq  Df F value    Pr(>F)    
## Cohort              1277307   1 32.8886 8.543e-08 ***
## TimeCategory         811655   2 10.4494 6.951e-05 ***
## Cohort:TimeCategory  131464   2  1.6925    0.1888    
## Residuals           4310952 111                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## # A tibble: 19 x 9
##    term  group1 group2 null.value estimate conf.low conf.high   p.adj
##  * <chr> <chr>  <chr>       <dbl>    <dbl>    <dbl>     <dbl>   <dbl>
##  1 Coho~ Healt~ anti-~          0  -225.     -300.    -151.   2.29e-8
##  2 Time~ basel~ oneWe~          0    77.9     -27.6    183.   1.90e-1
##  3 Time~ basel~ late            0   199.       95.3    303.   4.02e-5
##  4 Time~ oneWe~ late            0   121.       10.9    232.   2.77e-2
##  5 Coho~ Healt~ anti-~          0  -144.     -324.      37.0  2.00e-1
##  6 Coho~ Healt~ Healt~          0   119.      -89.8    328.   5.66e-1
##  7 Coho~ Healt~ anti-~          0   -82.9    -278.     112.   8.20e-1
##  8 Coho~ Healt~ Healt~          0   303.       94.8    512.   7.07e-4
##  9 Coho~ Healt~ anti-~          0    -5.77   -197.     186.   1.00e+0
## 10 Coho~ anti-~ Healt~          0   263.       81.9    443.   7.13e-4
## 11 Coho~ anti-~ anti-~          0    60.8    -104.     226.   8.93e-1
## 12 Coho~ anti-~ Healt~          0   447.      266.     628.   1.28e-9
## 13 Coho~ anti-~ anti-~          0   138.      -22.5    298.   1.35e-1
## 14 Coho~ Healt~ anti-~          0  -202.     -397.      -6.65 3.83e-2
## 15 Coho~ Healt~ Healt~          0   185.      -24.2    393.   1.15e-1
## 16 Coho~ Healt~ anti-~          0  -125.     -316.      66.7  4.14e-1
## 17 Coho~ anti-~ Healt~          0   386.      191.     582.   1.23e-6
## 18 Coho~ anti-~ anti-~          0    77.2     -99.4    254.   8.02e-1
## 19 Coho~ Healt~ anti-~          0  -309.     -501.    -118.   1.14e-4
## # ... with 1 more variable: p.adj.signif <chr>
# write.csv(melted, file="D:/Pembro-Fluvac/Analysis/sourceDataExports/2c-3.csv")
grid.arrange(a,b,c,nrow=1)

  # ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/HAIresponsesTimeAveraged_3strains.pdf", arrangeGrob(a,b,c,nrow=1), width=14)



subsetData <- subset(mergedData, Cohort != "nonPD1" )
  a<-prePostTime(data = subsetData, xData = "TimeCategory", yData = "H1N1pdm09.HAI.titer", fillParam = "Cohort", title = "H1N1", xLabel = "TimeCategory",
            yLabel = "H1N1pdm09 HAI titer", groupby = "dbCode") + scale_y_continuous(trans='log2', breaks=2^(1:13))
  b<-prePostTime(data = subsetData, xData = "TimeCategory", yData = "H3N2.HAI.titer", fillParam = "Cohort", title = "H3N2", xLabel = "TimeCategory",
               yLabel = "H3N2 HAI titer", groupby = "dbCode") + scale_y_continuous(trans='log2', breaks=2^(1:13))
  c<-prePostTime(data = subsetData, xData = "TimeCategory", yData = "FluB.HAI.titer", fillParam = "Cohort", title = "FluB", xLabel = "TimeCategory",
               yLabel = "FluB HAI titer", groupby = "dbCode") + scale_y_continuous(trans='log2', breaks=2^(1:13))
grid.arrange(a,b,c,nrow=1)
## Warning: Removed 1 rows containing non-finite values (stat_summary).
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 39 rows containing non-finite values (stat_summary).
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## Warning: Removed 39 row(s) containing missing values (geom_path).
## Warning: Removed 39 rows containing missing values (geom_point).
## Warning: Removed 39 rows containing non-finite values (stat_summary).
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## Warning: Removed 39 row(s) containing missing values (geom_path).
## Warning: Removed 39 rows containing missing values (geom_point).

  # ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/HAItiter_overTime_3strains.pdf", arrangeGrob(a,b,c,nrow=1), width=14)
# write.csv(a$data, file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e3a-1.csv")
# write.csv(b$data, file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e3a-2.csv")
# write.csv(c$data, file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e3a-3.csv")

———— Correlations with HAI titer

subsetData <- subset(mergedData, Cohort == 'Healthy' & TimeCategory == "oneWeek")
subsetData <- subsetData[,grep( paste(c("FCtfh_oW","FCPB_oW", "cTfh_ICOShiCD38hi_..FreqParent", "CD27hiCD38hi_..FreqParent", "FCH1N1_hai_late"), collapse = "|"), names(subsetData))]  
cor.matrix <- cor(subsetData, method="kendall" , use="pairwise.complete.obs" )
cor.matrix.pmat <- ggcorrplot::cor_pmat(subsetData, method="kendall", use="pairwise.complete.obs"  )
## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties
cor.matrix <- as.data.frame(cor.matrix); cor.matrix$Labels <- row.names(cor.matrix); cor.matrix$Cohort <- "Healthy"
cor.matrix <- merge( x = cor.matrix, y = cor.matrix.pmat[,"FCH1N1_hai_late"], by = "row.names"); names(cor.matrix)[grep("y",names(cor.matrix))] <- "Pvalue"

subsetData <- subset(mergedData, Cohort == 'anti-PD-1'& TimeCategory == "oneWeek")
subsetData <- subsetData[,grep( paste(c("FCtfh_oW","FCPB_oW", "cTfh_ICOShiCD38hi_..FreqParent", "CD27hiCD38hi_..FreqParent","FCH1N1_hai_late"), collapse = "|"), names(subsetData))]  
cor.matrix2 <- cor(subsetData, method="kendall" , use="pairwise.complete.obs" )
cor.matrix2.pmat <- ggcorrplot::cor_pmat(subsetData, method="kendall", use="pairwise.complete.obs"  )
## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties
cor.matrix2 <- as.data.frame(cor.matrix2); cor.matrix2$Labels <- row.names(cor.matrix2); cor.matrix2$Cohort <- "anti-PD-1"
cor.matrix2 <- merge( x = cor.matrix2, y = cor.matrix2.pmat[,"FCH1N1_hai_late"], by = "row.names"); names(cor.matrix2)[grep("^y",names(cor.matrix2))] <- "Pvalue"

temp <- as.data.frame(rbind(cor.matrix, cor.matrix2)); temp <- temp[ -grep( "FCH1N1", temp$Row.names),]

temp$Labels <- str_replace(temp$Labels, pattern = "FCtfh_oW", replacement = "Fold-change ICOS+CD38+ cTfh\nat one week" )
temp$Labels <- str_replace(temp$Labels, pattern = "FCPB_oW", replacement = "Fold-change plasmablasts\nat one week" )
temp$Labels <- str_replace(temp$Labels, pattern = "cTfh_ICOShiCD38hi_..FreqParent", replacement = "ICOS+CD38+ cTfh (%CXCR5+CD4+)\nat one week" )
temp$Labels <- str_replace(temp$Labels, pattern = "CD27hiCD38hi_..FreqParent", replacement = "Plasmablasts (% CD19) \nat one week" )
temp$Labels <- factor(temp$Labels, levels = c( "Plasmablasts (% CD19) \nat one week","Fold-change plasmablasts\nat one week", 
                                               "ICOS+CD38+ cTfh (%CXCR5+CD4+)\nat one week","Fold-change ICOS+CD38+ cTfh\nat one week"))
ggplot( data = temp, aes(x = Labels,y = FCH1N1_hai_late, fill = Cohort)) + geom_bar(stat='identity',position = position_dodge(width=0.5), width=0.05, color="black", size=0.1) + 
  geom_point(aes(fill=Cohort, size=Pvalue), pch=21, color="black", stroke=0.2, position = position_dodge(width=0.5)) + 
  theme_bw() + scale_size(range = c(18,1), breaks = c(0,0.05,0.1,0.3,0.7), limits = c(0,0.8), trans = 'sqrt') + guides(size = guide_legend(reverse=TRUE)) + 
  scale_fill_manual(values=c("#FFB18C", "#7FAEDB")) + ylab("Kendall's tau vs\nFold-change in H1N1 HAI") + xlab(" ") + ggtitle("H1N1") + geom_hline(yintercept=0, linetype = "dashed") +
  theme(axis.text.y = element_text(size = 28, color="black"), plot.title = element_text(size=28), axis.text.x = element_text(size=22, color="black", angle=45, hjust=1,vjust=1), 
        axis.title.x = element_text(size=28, color="black"), legend.text = element_text(size=22), legend.title = element_text(size=22)) + theme(panel.grid.minor = element_blank(), panel.grid.major=element_blank()) + 
  coord_flip() + scale_y_continuous(limits = c(-1,1), breaks = seq(-1,1,0.25)) 

 # ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/Fold-changes_HAI_correlations.pdf", device = "pdf", width=13, height=9)
# write.csv(temp, file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e3e-1.csv")

subsetData <- subset(mergedData, Cohort == 'Healthy' & TimeCategory == "oneWeek")
subsetData <- subsetData[,grep( paste(c("FCtfh_oW","FCPB_oW", "cTfh_ICOShiCD38hi_..FreqParent", "CD27hiCD38hi_..FreqParent", "FCH3N2_hai_late"), collapse = "|"), names(subsetData))]  
cor.matrix <- cor(subsetData, method="kendall" , use="pairwise.complete.obs" )
cor.matrix.pmat <- ggcorrplot::cor_pmat(subsetData, method="kendall", use="pairwise.complete.obs"  )
## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties
cor.matrix <- as.data.frame(cor.matrix); cor.matrix$Labels <- row.names(cor.matrix); cor.matrix$Cohort <- "Healthy"
cor.matrix <- merge( x = cor.matrix, y = cor.matrix.pmat[,"FCH3N2_hai_late"], by = "row.names"); names(cor.matrix)[grep("y",names(cor.matrix))] <- "Pvalue"

subsetData <- subset(mergedData, Cohort == 'anti-PD-1'& TimeCategory == "oneWeek")
subsetData <- subsetData[,grep( paste(c("FCtfh_oW","FCPB_oW", "cTfh_ICOShiCD38hi_..FreqParent", "CD27hiCD38hi_..FreqParent","FCH3N2_hai_late"), collapse = "|"), names(subsetData))]  
cor.matrix2 <- cor(subsetData, method="kendall" , use="pairwise.complete.obs" )
cor.matrix2.pmat <- ggcorrplot::cor_pmat(subsetData, method="kendall", use="pairwise.complete.obs"  )
## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties
cor.matrix2 <- as.data.frame(cor.matrix2); cor.matrix2$Labels <- row.names(cor.matrix2); cor.matrix2$Cohort <- "anti-PD-1"
cor.matrix2 <- merge( x = cor.matrix2, y = cor.matrix2.pmat[,"FCH3N2_hai_late"], by = "row.names"); names(cor.matrix2)[grep("^y",names(cor.matrix2))] <- "Pvalue"

temp <- as.data.frame(rbind(cor.matrix, cor.matrix2)); temp <- temp[ -grep( "FCH3N2", temp$Row.names),]

temp$Labels <- str_replace(temp$Labels, pattern = "FCtfh_oW", replacement = "Fold-change ICOS+CD38+ cTfh\nat one week" )
temp$Labels <- str_replace(temp$Labels, pattern = "FCPB_oW", replacement = "Fold-change plasmablasts\nat one week" )
temp$Labels <- str_replace(temp$Labels, pattern = "cTfh_ICOShiCD38hi_..FreqParent", replacement = "ICOS+CD38+ cTfh (%CXCR5+CD4+)\nat one week" )
temp$Labels <- str_replace(temp$Labels, pattern = "CD27hiCD38hi_..FreqParent", replacement = "Plasmablasts (% CD19) \nat one week" )
temp$Labels <- factor(temp$Labels, levels = c( "Plasmablasts (% CD19) \nat one week","Fold-change plasmablasts\nat one week", 
                                              "ICOS+CD38+ cTfh (%CXCR5+CD4+)\nat one week","Fold-change ICOS+CD38+ cTfh\nat one week"))
ggplot( data = temp, aes(x = Labels,y = FCH3N2_hai_late, fill = Cohort)) + geom_bar(stat='identity',position = position_dodge(width=0.5), width=0.05, color="black", size=0.1) + 
 geom_point(aes(fill=Cohort, size=Pvalue), pch=21, color="black", stroke=0.2, position = position_dodge(width=0.5)) + 
 theme_bw() + scale_size(range = c(18,1), breaks = c(0,0.05,0.1,0.3,0.7), limits = c(0,0.8), trans = 'sqrt') + guides(size = guide_legend(reverse=TRUE)) + 
 scale_fill_manual(values=c("#FFB18C", "#7FAEDB")) + ylab("Kendall's tau vs\nFold-change in H3N2 HAI") + xlab(" ") + ggtitle("H3N2") + geom_hline(yintercept=0, linetype = "dashed") +
 theme(axis.text.y = element_text(size = 28, color="black"), plot.title = element_text(size=28), axis.text.x = element_text(size=22, color="black", angle=45, hjust=1,vjust=1), 
       axis.title.x = element_text(size=28, color="black"), legend.text = element_text(size=22), legend.title = element_text(size=22))+ theme(panel.grid.minor = element_blank(), panel.grid.major=element_blank()) + 
 coord_flip() + scale_y_continuous(limits = c(-1,1), breaks = seq(-1,1,0.25))
## Warning: Removed 2 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/Fold-changes_H3N2_correlations.pdf", device = "pdf", width=13, height=9)
# write.csv(temp, file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e3e-2.csv")

 
subsetData <- subset(mergedData, Cohort == 'Healthy' & TimeCategory == "oneWeek")
subsetData <- subsetData[,grep( paste(c("FCtfh_oW","FCPB_oW", "cTfh_ICOShiCD38hi_..FreqParent", "CD27hiCD38hi_..FreqParent", "FCFluB_hai_late"), collapse = "|"), names(subsetData))]  
cor.matrix <- cor(subsetData, method="kendall" , use="pairwise.complete.obs" )
cor.matrix.pmat <- ggcorrplot::cor_pmat(subsetData, method="kendall", use="pairwise.complete.obs"  )
## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties
cor.matrix <- as.data.frame(cor.matrix); cor.matrix$Labels <- row.names(cor.matrix); cor.matrix$Cohort <- "Healthy"
cor.matrix <- merge( x = cor.matrix, y = cor.matrix.pmat[,"FCFluB_hai_late"], by = "row.names"); names(cor.matrix)[grep("y",names(cor.matrix))] <- "Pvalue"

subsetData <- subset(mergedData, Cohort == 'anti-PD-1'& TimeCategory == "oneWeek")
subsetData <- subsetData[,grep( paste(c("FCtfh_oW","FCPB_oW", "cTfh_ICOShiCD38hi_..FreqParent", "CD27hiCD38hi_..FreqParent","FCFluB_hai_late"), collapse = "|"), names(subsetData))]  
cor.matrix2 <- cor(subsetData, method="kendall" , use="pairwise.complete.obs" )
cor.matrix2.pmat <- ggcorrplot::cor_pmat(subsetData, method="kendall", use="pairwise.complete.obs"  )
## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties

## Warning in cor.test.default(mat[, i], mat[, j], ...): Cannot compute exact p-
## value with ties
cor.matrix2 <- as.data.frame(cor.matrix2); cor.matrix2$Labels <- row.names(cor.matrix2); cor.matrix2$Cohort <- "anti-PD-1"
cor.matrix2 <- merge( x = cor.matrix2, y = cor.matrix2.pmat[,"FCFluB_hai_late"], by = "row.names"); names(cor.matrix2)[grep("^y",names(cor.matrix2))] <- "Pvalue"

temp <- as.data.frame(rbind(cor.matrix, cor.matrix2)); temp <- temp[ -grep( "FCFluB", temp$Row.names),]

temp$Labels <- str_replace(temp$Labels, pattern = "FCtfh_oW", replacement = "Fold-change ICOS+CD38+ cTfh\nat one week" )
temp$Labels <- str_replace(temp$Labels, pattern = "FCPB_oW", replacement = "Fold-change plasmablasts\nat one week" )
temp$Labels <- str_replace(temp$Labels, pattern = "cTfh_ICOShiCD38hi_..FreqParent", replacement = "ICOS+CD38+ cTfh (%CXCR5+CD4+)\nat one week" )
temp$Labels <- str_replace(temp$Labels, pattern = "CD27hiCD38hi_..FreqParent", replacement = "Plasmablasts (% CD19) \nat one week" )
temp$Labels <- factor(temp$Labels, levels = c( "Plasmablasts (% CD19) \nat one week","Fold-change plasmablasts\nat one week", 
                                               "ICOS+CD38+ cTfh (%CXCR5+CD4+)\nat one week","Fold-change ICOS+CD38+ cTfh\nat one week"))
ggplot( data = temp, aes(x = Labels,y = FCFluB_hai_late, fill = Cohort)) + geom_bar(stat='identity',position = position_dodge(width=0.5), width=0.05, color="black", size=0.1) + 
  geom_point(aes(fill=Cohort, size=Pvalue), pch=21, color="black", stroke=0.2, position = position_dodge(width=0.5)) + 
  theme_bw() + scale_size(range = c(18,1), breaks = c(0,0.05,0.1,0.3,0.7), limits = c(0,0.8), trans = 'sqrt') + guides(size = guide_legend(reverse=TRUE)) + 
  scale_fill_manual(values=c("#FFB18C", "#7FAEDB")) + ylab("Kendall's tau vs\nFold-change in FluB HAI") + xlab(" ") + ggtitle("Influenza B") + geom_hline(yintercept=0, linetype = "dashed") +
  theme(axis.text.y = element_text(size = 28, color="black"), plot.title = element_text(size=28), axis.text.x = element_text(size=22, color="black", angle=45, hjust=1,vjust=1), 
        axis.title.x = element_text(size=28, color="black"), legend.text = element_text(size=22), legend.title = element_text(size=22))+ theme(panel.grid.minor = element_blank(), panel.grid.major=element_blank()) + 
  coord_flip() + scale_y_continuous(limits = c(-1,1), breaks = seq(-1,1,0.25))
## Warning: Removed 2 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/Fold-changes_FluB_correlations.pdf", device = "pdf", width=13, height=9)
# write.csv(temp, file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e3e-3.csv")

———– glycosylation ——————–

# ***************************    Total afucosylation  *******************************


# nonspecific afucosylation
subsetData <- subset(mergedData, Cohort == "anti-PD-1" )
univScatter(data = subsetData, xData = "IgG1_Total.afucosylated", yData="Nonsp_afucosylated",fillParam = 'dummy', title = "Nonsp vs spec afucosylation", 
            xLabel = "anti-H1 afucosylation", yLabel = "Nonspecific afucosylation") + scale_x_continuous(limits=c(0,16))+ scale_fill_manual(values=c("#FFB18C"))
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 62 rows containing non-finite values (stat_smooth).
## Warning: Removed 62 rows containing missing values (geom_point).

# H1 specific afucosylation
subsetData <- subset(mergedData, Cohort != "nonPD1" )

prePostTime(subsetData, xData = "TimeCategory", yData = "IgG1_Total.afucosylated", fillParam = "Cohort", title = "IgG1 afucosylation", xLabel = "TimeCategory",
            yLabel = "anti-H1 Afucosylation (% abund)", groupby = "dbCode")  + scale_y_continuous(breaks=seq(0,240,5), limits = c(0,30) )
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/IgG1afucosylation_overTime.pdf", width=6)
# write.csv(subsetData[,c("dbCode","Cohort","TimeCategory","IgG1_Total.afucosylated")], file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e4b.csv")

prePostTime(subsetData, xData = "TimeCategory", yData = "IgG2_Total.afucosylated", fillParam = "Cohort", title = "IgG2 afucosylation over time", xLabel = "TimeCategory",
            yLabel = "Afucosylation (% abundance, anti-H1)", groupby = "dbCode") + scale_y_continuous(breaks=seq(0,240,5), limits = c(0,30))
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

prePostTime(subsetData, xData = "TimeCategory", yData = "IgG34_Total.afucosylated", fillParam = "Cohort", title = "IgG3/4 afucosylation over time", xLabel = "TimeCategory",
            yLabel = "Afucosylation (% abundance, anti-H1)", groupby = "dbCode") + scale_y_continuous(breaks=seq(0,240,5), limits = c(0,30))
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

# ***************************    Total Bisection   *******************************
prePostTime(subsetData, xData = "TimeCategory", yData = "IgG1_Total.Bisection", fillParam = "Cohort", title = "IgG1 bisection", xLabel = "TimeCategory",
            yLabel = "% anti-H1 IgG1", groupby = "dbCode") + scale_y_continuous(breaks=seq(0,240,5), limits = c(0,30) )
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

prePostTime(subsetData, xData = "TimeCategory", yData = "IgG2_Total.Bisection", fillParam = "Cohort", title = "IgG2 bisection over time", xLabel = "TimeCategory",
            yLabel = "Bisection (% abundance, anti-H1)", groupby = "dbCode") + scale_y_continuous(breaks=seq(0,240,5), limits = c(0,30) )
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

prePostTime(subsetData, xData = "TimeCategory", yData = "IgG34_Total.Bisection", fillParam = "Cohort", title = "IgG3/4 bisection over time", xLabel = "TimeCategory",
            yLabel = "Bisection (% abundance, anti-H1)", groupby = "dbCode") + scale_y_continuous(breaks=seq(0,240,5), limits = c(0,30) )
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

twoSampleBar(data=subset(mergedData, Cohort != "nonPD1" & TimeCategory == "baseline"), xData="Cohort", yData="IgG1_Total.Bisection", fillParam="Cohort", 
             title="Baseline bisection", yLabel="% anti-H1 IgG1")+ scale_y_continuous(limits = c(0,20), breaks = seq(0,50,5))
## [1] "path 1:  ttest == T && batch == none && nonparam == F"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"

twoSampleBar(data=subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek"), xData="Cohort", yData="IgG1_Total.Bisection", fillParam="Cohort", 
             title="One Week bisection", yLabel="% anti-H1 IgG1")+ scale_y_continuous(limits = c(0,20), breaks = seq(0,50,5))
## [1] "path 1:  ttest == T && batch == none && nonparam == F"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"

# ***************************    Galactosylation *******************************

# nonspecific afucosylation
subsetData <- subset(mergedData, Cohort == "anti-PD-1" )
univScatter(data = subsetData, xData = "IgG1_Total.Galactosylation..G1.G2.", yData="Nonsp_GalactosylationG1.G2",fillParam = 'dummy', title = "Nonsp vs spec galactosylation", 
            xLabel = "anti-H1 galactosylation", yLabel = "Nonspecific galactosylation") + scale_x_continuous(limits=c(90,100))+ scale_fill_manual(values=c("#FFB18C"))
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 62 rows containing non-finite values (stat_smooth).

## Warning: Removed 62 rows containing missing values (geom_point).

# specific galactosylation
subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('dbCode', 'TimeCategory', 'Cohort','Year'), measure.vars = c("IgG1_Total.Galactosylation..G1.G2."))
prePostTimeAveraged(melted, title = "Total galactosylation", xLabel = NULL, yLabel = "% anti-H1 IgG1") + 
  scale_y_continuous(breaks=seq(0,100,1), limits = c(90,100))

  # ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/IgG1_totGalactosylation_overTime.pdf", width=4.5, height=7)
# melted %>%  anova_test(value ~ TimeCategory+Cohort )                        # two-way anova without interaction term 
Anova(fit <- lm( value ~ Cohort * TimeCategory, data = melted)); tukey_hsd(fit)
## Anova Table (Type II tests)
## 
## Response: value
##                      Sum Sq  Df  F value Pr(>F)    
## Cohort              204.313   1 138.1247 <2e-16 ***
## TimeCategory          5.779   2   1.9533 0.1454    
## Cohort:TimeCategory   3.070   2   1.0377 0.3568    
## Residuals           221.879 150                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## # A tibble: 19 x 9
##    term  group1 group2 null.value estimate conf.low conf.high    p.adj
##  * <chr> <chr>  <chr>       <dbl>    <dbl>    <dbl>     <dbl>    <dbl>
##  1 Coho~ Healt~ anti-~          0 -2.32e+0   -2.70     -1.93  0.      
##  2 Time~ basel~ oneWe~          0  3.12e-1   -0.252     0.876 3.91e- 1
##  3 Time~ basel~ late            0  4.49e-1   -0.106     1.00  1.38e- 1
##  4 Time~ oneWe~ late            0  1.36e-1   -0.443     0.715 8.43e- 1
##  5 Coho~ Healt~ anti-~          0 -2.64e+0   -3.57     -1.70  1.82e-12
##  6 Coho~ Healt~ Healt~          0  1.14e-1   -0.841     1.07  9.99e- 1
##  7 Coho~ Healt~ anti-~          0 -2.13e+0   -3.15     -1.11  1.85e- 7
##  8 Coho~ Healt~ Healt~          0  1.15e-1   -0.841     1.07  9.99e- 1
##  9 Coho~ Healt~ anti-~          0 -1.85e+0   -2.83     -0.864 3.47e- 6
## 10 Coho~ anti-~ Healt~          0  2.75e+0    1.82      3.68  2.70e-13
## 11 Coho~ anti-~ anti-~          0  5.03e-1   -0.496     1.50  6.94e- 1
## 12 Coho~ anti-~ Healt~          0  2.75e+0    1.82      3.68  2.68e-13
## 13 Coho~ anti-~ anti-~          0  7.87e-1   -0.175     1.75  1.76e- 1
## 14 Coho~ Healt~ anti-~          0 -2.25e+0   -3.27     -1.23  3.61e- 8
## 15 Coho~ Healt~ Healt~          0  5.89e-4   -0.955     0.956 1.00e+ 0
## 16 Coho~ Healt~ anti-~          0 -1.96e+0   -2.95     -0.978 7.05e- 7
## 17 Coho~ anti-~ Healt~          0  2.25e+0    1.23      3.27  3.58e- 8
## 18 Coho~ anti-~ anti-~          0  2.84e-1   -0.765     1.33  9.70e- 1
## 19 Coho~ Healt~ anti-~          0 -1.96e+0   -2.95     -0.979 6.99e- 7
## # ... with 1 more variable: p.adj.signif <chr>
# write.csv(melted, file="D:/Pembro-Fluvac/Analysis/sourceDataExports/2d.csv")

subsetData <- subset(mergedData, Cohort != "nonPD1" )
prePostTime(subsetData, xData = "TimeCategory", yData = "IgG1_Total.Galactosylation..G1.G2.", fillParam = "Cohort", title = "IgG1 total galactosylation", 
            xLabel = "TimeCategory", yLabel = "% anti-H1 IgG1", groupby = "dbCode") + scale_y_continuous(breaks=seq(0,240,5), limits = c(0,100) ) + 
  coord_cartesian(ylim = c(90,100))
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/IgG1_Tot-galactosylation_overTime_perSubject.pdf", width=6.5)
# write.csv(subsetData[,c("dbCode","Cohort","TimeCategory","IgG1_Total.afucosylated")], file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e4f-1.csv")

prePostTime(subsetData, xData = "TimeCategory", yData = "IgG1_Total.Galactosylation..G1.G2.", fillParam = "Cohort", title = "IgG2 total galactosylation", 
            xLabel = "TimeCategory", yLabel = "% anti-H1 IgG2", groupby = "dbCode") + scale_y_continuous(breaks=seq(0,240,5), limits = c(0,100) )+ 
  coord_cartesian(ylim = c(80,100))
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/IgG2_Tot-galactosylation_overTime_perSubject.pdf", width=6.5)
# write.csv(subsetData[,c("dbCode","Cohort","TimeCategory","IgG1_Total.afucosylated")], file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e4f-2.csv")

prePostTime(subsetData, xData = "TimeCategory", yData = "IgG34_Total.Galactosylation..G1.G2.", fillParam = "Cohort", title = "IgG3/4 total galactosylation", 
            xLabel = "TimeCategory", yLabel = "% anti-H1 IgG3/4", groupby = "dbCode") + scale_y_continuous(breaks=seq(0,240,5), limits = c(0,100) )+ 
  coord_cartesian(ylim = c(50,100))
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/IgG34_Tot-galactosylation_overTime_perSubject.pdf", width=6.5)
# write.csv(subsetData[,c("dbCode","Cohort","TimeCategory","IgG1_Total.afucosylated")], file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e4f-3.csv")

subsetData <- subset(mergedData, Cohort != "nonPD1"  & TimeCategory == "baseline")
twoSampleBar(data=subsetData, xData="Cohort", yData="IgG1_Total.Galactosylation..G1.G2.", fillParam="Cohort", title="Total galactosylation\nbaseline", 
             yLabel="% anti-H1 IgG1") + 
  scale_y_continuous(breaks = seq(0,100,2)) +   coord_cartesian(ylim = c(88,102)) 
## [1] "path 1:  ttest == T && batch == none && nonparam == F"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"

  # ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/IgG1_totGalactosylation_bL.pdf", width=5, height=8)
# write.csv(subsetData[,c("dbCode","Cohort","TimeCategory","IgG1_Total.Galactosylation..G1.G2.")], file="D:/Pembro-Fluvac/Analysis/sourceDataExports/2e.csv")

prePostTime(subsetData, xData = "TimeCategory", yData = "IgG1_Total.G0", fillParam = "Cohort", title = "IgG1 G0 galactosylation over time", xLabel = "TimeCategory",
            yLabel = "% anti-H1 IgG1", groupby = "Subject") + scale_y_continuous(breaks=seq(0,240,5), limits = c(0,30) )
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?

prePostTime(subsetData, xData = "TimeCategory", yData = "IgG2_Total.G0", fillParam = "Cohort", title = "IgG2 G0 galactosylation over time", xLabel = "TimeCategory",
            yLabel = "% anti-H1 IgG2", groupby = "Subject") + scale_y_continuous(breaks=seq(0,240,5), limits = c(0,30) )
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?

prePostTime(subsetData, xData = "TimeCategory", yData = "IgG34_Total.G0", fillParam = "Cohort", title = "IgG3/4 G0 galactosylation over time", xLabel = "TimeCategory",
            yLabel = "% anti-H1 IgG3/4", groupby = "Subject") + scale_y_continuous(breaks=seq(0,240,5), limits = c(0,30) )
## Warning: Removed 1 rows containing non-finite values (stat_summary).
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## Warning: Removed 1 row(s) containing missing values (geom_path).
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## Warning: Removed 1 rows containing missing values (geom_point).

subsetData <- subset(mergedData, Cohort != "nonPD1"  & TimeCategory == "baseline")
twoSampleBar(data=subsetData, xData="Cohort", yData="IgG1_Total.G0", fillParam="Cohort", 
             title="G0 - baseline", yLabel="% anti-H1 IgG1", nonparam=T) + scale_y_continuous(breaks = seq(0,100,2)) + 
  coord_cartesian(ylim = c(0,10))
## [1] "path 2:  ttest == T && batch == none && nonparam == T"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/IgG1_G0_Galactosylation_bL.pdf", width=4.5)
# write.csv(subsetData[,c("dbCode","Cohort","TimeCategory","IgG1_Total.G0")], file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e4e-1.csv")



prePostTime(subsetData, xData = "TimeCategory", yData = "IgG1_Total.G1", fillParam = "Cohort", title = "IgG1 G1 galactosylation over time", xLabel = "TimeCategory",
            yLabel = "G1 Galactosylation (% abundance, anti-H1)", groupby = "Subject") + scale_y_continuous(breaks=seq(0,240,5), limits = c(0,100) )
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?

prePostTime(subsetData, xData = "TimeCategory", yData = "IgG2_Total.G1", fillParam = "Cohort", title = "IgG2 G1 galactosylation over time", xLabel = "TimeCategory",
            yLabel = "G1 Galactosylation (% abundance, anti-H1)", groupby = "Subject") + scale_y_continuous(breaks=seq(0,240,5), limits = c(0,100) )
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?

prePostTime(subsetData, xData = "TimeCategory", yData = "IgG34_Total.G1", fillParam = "Cohort", title = "IgG3/4 G1 galactosylation over time", xLabel = "TimeCategory",
            yLabel = "G1 Galactosylation (% abundance, anti-H1)", groupby = "Subject") + scale_y_continuous(breaks=seq(0,240,5), limits = c(0,100) )
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?

subsetData <- subset(mergedData, Cohort != "nonPD1"  & TimeCategory == "baseline")
twoSampleBar(data=subsetData, xData="Cohort", yData="IgG1_Total.G1", fillParam="Cohort", 
             title="G1 - baseline", yLabel="% anti-H1 IgG1", nonparam=T) + scale_y_continuous(limits = c(0,70), breaks = seq(0,100,10)) 
## [1] "path 2:  ttest == T && batch == none && nonparam == T"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/IgG1_G1_Galactosylation_bL.pdf", width=4.5)
# write.csv(subsetData[,c("dbCode","Cohort","TimeCategory","IgG1_Total.G1")], file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e4e-2.csv")


prePostTime(subsetData, xData = "TimeCategory", yData = "IgG1_Total.G2", fillParam = "Cohort", title = "IgG1 G2 galactosylation over time", xLabel = "TimeCategory",
            yLabel = "G2 Galactosylation (% abundance, anti-H1)", groupby = "Subject") + scale_y_continuous(breaks=seq(0,240,5), limits = c(0,80) )
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?

prePostTime(subsetData, xData = "TimeCategory", yData = "IgG2_Total.G2", fillParam = "Cohort", title = "IgG2 G2 galactosylation over time", xLabel = "TimeCategory",
            yLabel = "G2 Galactosylation (% abundance, anti-H1)", groupby = "Subject") + scale_y_continuous(breaks=seq(0,240,5), limits = c(0,80) )
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?

prePostTime(subsetData, xData = "TimeCategory", yData = "IgG34_Total.G2", fillParam = "Cohort", title = "IgG3/4 G2 galactosylation over time", xLabel = "TimeCategory",
            yLabel = "G2 Galactosylation (% abundance, anti-H1)", groupby = "Subject") + scale_y_continuous(breaks=seq(0,240,5), limits = c(0,80) )
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?

subsetData <- subset(mergedData, Cohort != "nonPD1"  & TimeCategory == "baseline")
twoSampleBar(data=subsetData, xData="Cohort", yData="IgG1_Total.G2", fillParam="Cohort", 
             title="G2 - baseline", yLabel="% anti-H1 IgG1", nonparam=T) + scale_y_continuous(limits = c(0,80), breaks = seq(0,100,10)) 
## [1] "path 2:  ttest == T && batch == none && nonparam == T"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/IgG1_G2_Galactosylation_bL.pdf", width=4.5)
# write.csv(subsetData[,c("dbCode","Cohort","TimeCategory","IgG1_Total.G2")], file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e4e-3.csv")




# ***************************    Sialylation *******************************

subsetData1 <- subset(mergedData, TimeCategory == "baseline" & Cohort == "Healthy")
  subsetData2 <- subset(mergedData, TimeCategory == "baseline" & Cohort == "anti-PD-1")
bivScatter(data1 = subsetData1, data2 = subsetData2,
           name1 = "Healthy", name2 = "anti-PD-1", xData = "IgG1_Total.Galactosylation..G1.G2.", yData = "IgG1_Total.sialylated", fillParam = "Cohort", 
           title = "Sialylation vs Galactosylation", xLabel = "Total galactosylation (% anti-H1 IgG)", yLabel = "Sialylation (% anti-H1 IgG)", nonparam = T) + 
  coord_cartesian(ylim = c(0,30), xlim = c(90,99))
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/IgG1_sial-vs-TotGalactosyl_correlation_twoCohorts.pdf", width=8, height=8)
temp <- rbind(subsetData1,subsetData2); 
# write.csv(x = temp[,c("dbCode", "Cohort","IgG1_Total.Galactosylation..G1.G2.","IgG1_Total.sialylated")], file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e4c.csv")


# nonspecific sialylation
subsetData <- subset(mergedData, Cohort == "anti-PD-1" )
univScatter(data = subsetData, xData = "IgG1_Total.sialylated", yData="Nonsp_sialylated",fillParam = 'dummy', title = "Nonsp vs spec sialylation", 
            xLabel = "anti-H1 sialylation", yLabel = "Nonspecific sialylation") + scale_x_continuous(limits=c(5,30))+ scale_fill_manual(values=c("#FFB18C"))
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 62 rows containing non-finite values (stat_smooth).

## Warning: Removed 62 rows containing missing values (geom_point).

# specific sialylation
subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('dbCode', 'TimeCategory', 'Cohort','Year'), measure.vars = c("IgG1_Total.sialylated"))
prePostTimeAveraged(melted, title = "Sialylation", xLabel = NULL, yLabel = "% anti-H1 IgG1") + scale_y_continuous(breaks=seq(0,99,2), limits = c(10,22))

  # ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/IgG1_sialylation_overTime.pdf", width=4.5, height=7)
# write.csv(melted, file="D:/Pembro-Fluvac/Analysis/sourceDataExports/2f.csv")

melted %>%  anova_test(value ~ TimeCategory+Cohort )                        # two-way anova without interaction term 
## Coefficient covariances computed by hccm()
## ANOVA Table (type II tests)
## 
##         Effect DFn DFd      F        p p<.05   ges
## 1 TimeCategory   2 152  0.349 7.06e-01       0.005
## 2       Cohort   1 152 21.531 7.46e-06     * 0.124
Anova(fit <- lm( value ~ Cohort * TimeCategory, data = melted)); tukey_hsd(fit)
## Anova Table (Type II tests)
## 
## Response: value
##                     Sum Sq  Df F value    Pr(>F)    
## Cohort               800.1   1 21.4416 7.846e-06 ***
## TimeCategory          25.9   2  0.3473    0.7072    
## Cohort:TimeCategory   51.1   2  0.6853    0.5055    
## Residuals           5597.4 150                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## # A tibble: 19 x 9
##    term  group1 group2 null.value estimate conf.low conf.high   p.adj
##  * <chr> <chr>  <chr>       <dbl>    <dbl>    <dbl>     <dbl>   <dbl>
##  1 Coho~ Healt~ anti-~          0   -4.57    -6.51     -2.64  6.59e-6
##  2 Time~ basel~ oneWe~          0    0.281   -2.55      3.11  9.70e-1
##  3 Time~ basel~ late            0    0.962   -1.83      3.75  6.93e-1
##  4 Time~ oneWe~ late            0    0.681   -2.23      3.59  8.44e-1
##  5 Coho~ Healt~ anti-~          0   -3.74    -8.42      0.941 1.98e-1
##  6 Coho~ Healt~ Healt~          0    1.47    -3.33      6.27  9.50e-1
##  7 Coho~ Healt~ anti-~          0   -4.81    -9.94      0.319 7.97e-2
##  8 Coho~ Healt~ Healt~          0    1.05    -3.74      5.85  9.88e-1
##  9 Coho~ Healt~ anti-~          0   -2.78    -7.73      2.17  5.84e-1
## 10 Coho~ anti-~ Healt~          0    5.20     0.526     9.88  1.97e-2
## 11 Coho~ anti-~ anti-~          0   -1.07    -6.09      3.94  9.90e-1
## 12 Coho~ anti-~ Healt~          0    4.79     0.114     9.47  4.12e-2
## 13 Coho~ anti-~ anti-~          0    0.956   -3.87      5.79  9.93e-1
## 14 Coho~ Healt~ anti-~          0   -6.28   -11.4      -1.15  7.11e-3
## 15 Coho~ Healt~ Healt~          0   -0.412   -5.21      4.39  1.00e+0
## 16 Coho~ Healt~ anti-~          0   -4.25    -9.20      0.699 1.37e-1
## 17 Coho~ anti-~ Healt~          0    5.87     0.736    11.0   1.50e-2
## 18 Coho~ anti-~ anti-~          0    2.03    -3.24      7.30  8.76e-1
## 19 Coho~ Healt~ anti-~          0   -3.84    -8.78      1.11  2.26e-1
## # ... with 1 more variable: p.adj.signif <chr>
# write.csv(melted, file = "./sialylation_forTwoWayAnova.csv")

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "baseline")
twoSampleBar(data=subsetData, xData="Cohort", yData="IgG1_Total.sialylated", fillParam="Cohort", 
             title="Sialylation\nbaseline", yLabel="% anti-H1 IgG1") + scale_y_continuous(limits = c(0,40), breaks = seq(0,50,5))
## [1] "path 1:  ttest == T && batch == none && nonparam == F"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"

  # ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/IgG1sialylation_bL.pdf", width=5, height=8)
# write.csv(subsetData[,c("dbCode","Cohort","TimeCategory","IgG1_Total.sialylated")], file="D:/Pembro-Fluvac/Analysis/sourceDataExports/2g.csv")


subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek")
twoSampleBar(data=subsetData, xData="Cohort", yData="IgG1_Total.sialylated", fillParam="Cohort", 
             title="One week", yLabel="Sialylation (% anti-H1 IgG1)")+ scale_y_continuous(limits = c(0,40), breaks = seq(0,50,5))
## [1] "path 1:  ttest == T && batch == none && nonparam == F"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"

subsetData <- subset(mergedData, Cohort != "nonPD1" )
prePostTime(subsetData, xData = "TimeCategory", yData = "IgG1_Total.sialylated", fillParam = "Cohort", title = "IgG1 sialylation", xLabel = "TimeCategory",
            yLabel = "% anti-H1 IgG1", groupby = "Subject") + scale_y_continuous(breaks=seq(0,240,10), limits = c(0,40) )
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

 # ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/IgG1_sialylation_overTime_perSubject.pdf", width=6)
# write.csv(subsetData[,c("dbCode","Cohort","TimeCategory","IgG1_Total.sialylated")], file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e4f-1.csv")
prePostTime(subsetData, xData = "TimeCategory", yData = "IgG2_Total.sialylated", fillParam = "Cohort", title = "IgG2 sialylation", xLabel = "TimeCategory",
            yLabel = "% anti-H1 IgG2", groupby = "Subject") + scale_y_continuous(breaks=seq(0,240,10), limits = c(0,40) )
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

 # ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/IgG2_sialylation_overTime_perSubject.pdf", width=6)
# write.csv(subsetData[,c("dbCode","Cohort","TimeCategory","IgG2_Total.sialylated")], file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e4f-2.csv")
prePostTime(subsetData, xData = "TimeCategory", yData = "IgG34_Total.sialylated", fillParam = "Cohort", title = "IgG3/4 sialylation", xLabel = "TimeCategory",
            yLabel = "% anti-H1 IgG3/4", groupby = "Subject") + scale_y_continuous(breaks=seq(0,240,10), limits = c(0,40) )
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

 # ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/IgG34_sialylation_overTime_perSubject.pdf", width=6)
# write.csv(subsetData[,c("dbCode","Cohort","TimeCategory","IgG34_Total.sialylated")], file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e4f-3.csv")


summary(lm( FChai_late ~ Cohort + Year + IgG1sial_oW, data=subsetData))
## 
## Call:
## lm(formula = FChai_late ~ Cohort + Year + IgG1sial_oW, data = subsetData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.82890 -0.21526 -0.04529  0.18127  1.38534 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      0.78591    0.30882   2.545   0.0121 *  
## Cohortanti-PD-1  0.48945    0.07300   6.704 5.83e-10 ***
## Year            -0.05049    0.06115  -0.826   0.4105    
## IgG1sial_oW      0.42385    0.24118   1.757   0.0812 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4039 on 128 degrees of freedom
##   (24 observations deleted due to missingness)
## Multiple R-squared:  0.2638, Adjusted R-squared:  0.2466 
## F-statistic: 15.29 on 3 and 128 DF,  p-value: 1.459e-08
summary(lm( IgG1sial_oW ~ Cohort + FCtfh_oW, data=subsetData))
## 
## Call:
## lm(formula = IgG1sial_oW ~ Cohort + FCtfh_oW, data = subsetData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.29779 -0.06854 -0.01883  0.05464  0.75694 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      1.108461   0.024834  44.635   <2e-16 ***
## Cohortanti-PD-1 -0.020589   0.029729  -0.693    0.490    
## FCtfh_oW        -0.005261   0.014278  -0.368    0.713    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1595 on 134 degrees of freedom
##   (19 observations deleted due to missingness)
## Multiple R-squared:  0.006815,   Adjusted R-squared:  -0.008009 
## F-statistic: 0.4597 on 2 and 134 DF,  p-value: 0.6324

———– Affinity ————————-

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "baseline"); subsetData$X1.Kd.HA <- 1 / subsetData$X1.Kd.HA
twoSampleBar(data=subsetData, xData="Cohort", yData="X1.Kd.HA", fillParam="Cohort", title="HA EC50/Kd\nBaseline", yLabel="1 / Concentration (ug/mL)",position = 'left') + 
  scale_y_continuous(breaks = seq(0,100,1)) + theme(plot.title = element_text(size=32)) +
  ylab(expression(paste("1 / H1 Concentration (",mu,"g/mL)"))) + ggtitle(expression(paste("E",C[50],"/",K[d], " baseline")))
## [1] "path 1:  ttest == T && batch == none && nonparam == F"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"
## Warning: Removed 31 rows containing missing values (position_quasirandom).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/AntibodyAffinity_baseLine.pdf", device="pdf", width=4.1, height=8)
# write.csv(subsetData[,c("dbCode","Cohort","TimeCategory","X1.Kd.HA")], file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e4k.csv")


subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort','Year'), measure.vars = c("Lo.Hi.HA.affinity"))
prePostTimeAveraged(melted, title = "anti-HA Affinity", xLabel = NULL, yLabel = "Lo-Hi HA affinity") + scale_y_continuous(breaks=seq(0,99,0.03), limits = c(0.6,0.9))

 # ggsave (filename = "D:/Pembro-Fluvac/Analysis/Images/antiHA-affinity_overTime_LoHi.pdf", width=4.5, height=7)
melted %>%  anova_test(value ~ TimeCategory+Cohort )                        # two-way anova without interaction term 
## Warning: NA detected in rows: 9.
## Removing this rows before the analysis.
## Coefficient covariances computed by hccm()
## ANOVA Table (type II tests)
## 
##         Effect DFn DFd      F        p p<.05   ges
## 1 TimeCategory   2 151 15.257 9.22e-07     * 0.168
## 2       Cohort   1 151 68.632 5.93e-14     * 0.312
Anova(fit <- lm( value ~ Cohort * TimeCategory, data = melted)); tukey_hsd(fit)
## Anova Table (Type II tests)
## 
## Response: value
##                      Sum Sq  Df F value    Pr(>F)    
## Cohort              0.47436   1 68.2280 7.281e-14 ***
## TimeCategory        0.21091   2 15.1677 1.010e-06 ***
## Cohort:TimeCategory 0.00772   2  0.5554     0.575    
## Residuals           1.03593 149                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## # A tibble: 19 x 9
##    term  group1 group2 null.value estimate conf.low conf.high    p.adj
##  * <chr> <chr>  <chr>       <dbl>    <dbl>    <dbl>     <dbl>    <dbl>
##  1 Coho~ Healt~ anti-~          0  -0.115  -0.141    -0.0883  0.      
##  2 Time~ basel~ oneWe~          0   0.0452  0.00627   0.0841  1.83e- 2
##  3 Time~ basel~ late            0   0.0883  0.0503    0.126   4.92e- 7
##  4 Time~ oneWe~ late            0   0.0432  0.00325   0.0831  3.06e- 2
##  5 Coho~ Healt~ anti-~          0  -0.130  -0.193    -0.0658  4.24e- 7
##  6 Coho~ Healt~ Healt~          0   0.0314 -0.0341    0.0970  7.36e- 1
##  7 Coho~ Healt~ anti-~          0  -0.0694 -0.140     0.00158 5.93e- 2
##  8 Coho~ Healt~ Healt~          0   0.0734  0.00791   0.139   1.84e- 2
##  9 Coho~ Healt~ anti-~          0  -0.0263 -0.0938    0.0412  8.71e- 1
## 10 Coho~ anti-~ Healt~          0   0.161   0.0972    0.225   2.64e-10
## 11 Coho~ anti-~ anti-~          0   0.0602 -0.00931   0.130   1.31e- 1
## 12 Coho~ anti-~ Healt~          0   0.203   0.139     0.267   2.35e-14
## 13 Coho~ anti-~ anti-~          0   0.103   0.0374    0.169   1.76e- 4
## 14 Coho~ Healt~ anti-~          0  -0.101  -0.172    -0.0299  9.38e- 4
## 15 Coho~ Healt~ Healt~          0   0.0420 -0.0235    0.108   4.37e- 1
## 16 Coho~ Healt~ anti-~          0  -0.0577 -0.125     0.00980 1.40e- 1
## 17 Coho~ anti-~ Healt~          0   0.143   0.0719    0.214   5.48e- 7
## 18 Coho~ anti-~ anti-~          0   0.0432 -0.0297    0.116   5.28e- 1
## 19 Coho~ Healt~ anti-~          0  -0.0997 -0.167    -0.0322  5.03e- 4
## # ... with 1 more variable: p.adj.signif <chr>
# write.csv(melted, file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e4l.csv")

subsetData <- subset(mergedData, Cohort != "nonPD1" )
prePostTime(subsetData, xData = "TimeCategory", yData = "Lo.Hi.HA.affinity", fillParam = "Cohort", title = "anti-HA Affinity", xLabel = "TimeCategory",
            yLabel = "Lo-Hi HA affinity (anti-H1)", groupby = "Subject") + scale_y_continuous(breaks=seq(0,10,0.25), limits = c(0,1) )
## Warning: Removed 1 rows containing non-finite values (stat_summary).
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).

 # ggsave (filename = "D:/Pembro-Fluvac/Analysis/Images/antiHA-affinity_overTime_LoHi_perSubject.pdf", width=8)
# write.csv(subsetData[,c("dbCode","Cohort","TimeCategory","Lo.Hi.HA.affinity")], file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e4m.csv")

twoSampleBar(data=subset(mergedData, Cohort != "nonPD1" & TimeCategory == "baseline"), xData="Cohort", yData="Lo.Hi.HA.affinity", fillParam="Cohort", 
             title="anti-HA affinity at baseline", yLabel="Lo Hi HA affinity (anti-H1)") + scale_y_continuous(breaks=seq(0,10,0.25), limits = c(0,1.1) )
## [1] "path 1:  ttest == T && batch == none && nonparam == F"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"

twoSampleBar(data=subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek"), xData="Cohort", yData="Lo.Hi.HA.affinity", fillParam="Cohort", 
             title="anti-HA affinity at oneWeek", yLabel="Lo Hi HA affinity (anti-H1)") + scale_y_continuous(breaks=seq(0,10,0.25), limits = c(0,1.1) )
## [1] "path 1:  ttest == T && batch == none && nonparam == F"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"
## Warning: Removed 1 rows containing missing values (position_quasirandom).

twoSampleBar(data=subset(mergedData, Cohort != "nonPD1" & TimeCategory == "late"), xData="Cohort", yData="Lo.Hi.HA.affinity", fillParam="Cohort", 
             title="anti-HA affinity at late", yLabel="Lo Hi HA affinity (anti-H1)") + scale_y_continuous(breaks=seq(0,10,0.25), limits = c(0,1.1) )
## [1] "path 1:  ttest == T && batch == none && nonparam == F"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"

FC_Affinity <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("Lo.Hi.HA.affinity"))
FC_Affinity$FC <- FC_Affinity$`late`/FC_Affinity$`baseline`
data=FC_Affinity; xData="Cohort"; yData="FC"; fillParam="Cohort"; title="anti-HA Affinity"; yLabel="fold-change (late / baseline)";
justforttest <- data[, c(xData,yData)]
fit <- rstatix::t_test(justforttest, formula = as.formula(paste(colnames(justforttest)[2], "~", paste(colnames(justforttest)[1]), sep = "") ))
pValue <- fit$p
annotationInfo <- paste0("P = ", round(pValue, 2)); my_grob = grobTree(textGrob(annotationInfo, x=0.03,  y=0.93, hjust=0, gp=gpar(col="black", fontsize=28)))
ggplot(data=data, aes_string(x=xData, y=yData, fill=fillParam, width=0.6)) + scale_fill_manual(values = c("#7FAEDB", "#FFB18C")) + 
  geom_hline(yintercept=1, linetype="dashed", color = "black", size=0.5) + 
  geom_violin(draw_quantiles = 0.5) + ggbeeswarm::geom_quasirandom(size=7, pch=21, fill="black", color="white", alpha=0.35) + 
  ggtitle(title) + ylab(yLabel) +  theme_bw() +
  theme(axis.text = element_text(size=28,hjust = 0.5, color="black"), axis.title = element_text(size=28,hjust = 0.5), axis.title.x = element_blank(), 
        plot.title = element_text(size=32,hjust = 0.5), axis.text.x = element_text(angle=45, hjust=1,vjust=1)) + theme(panel.grid.minor = element_blank(), panel.grid.major=element_blank())+ 
  annotation_custom(my_grob) + theme(legend.position = "none") + scale_y_continuous(breaks=seq(0,99,0.25), limits = c(0.75,1.75))
## Warning: Removed 6 rows containing non-finite values (stat_ydensity).
## Warning: Removed 6 rows containing missing values (position_quasirandom).

subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('dbCode', 'TimeCategory', 'Cohort'), measure.vars = c("X1.Kd.HA")); melted$value <- 1/melted$value
prePostTimeAveraged(melted, title = "HA EC50/Kd", xLabel = NULL, yLabel = "1 / Concentration (ug/mL)") + scale_y_continuous(breaks=seq(0,99,0.5), limits = c(0.5,3.5)) + 
  ylab(expression(paste("1 / H1 Concentration (",mu,"g/mL)"))) + ggtitle(expression(paste("E",C[50],"/",K[d])))

 # ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/AntibodyAffinity_overTime.pdf", device="pdf", width=4.5, height=7)
summary(fit <- lm( data = mergedData, X1.Kd.HA ~ Cohort  + TimeCategory))
## 
## Call:
## lm(formula = X1.Kd.HA ~ Cohort + TimeCategory, data = mergedData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.8864 -0.7640 -0.4373  0.2622  5.7613 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)  
## (Intercept)           0.6456     0.3647   1.770   0.0817 .
## Cohortanti-PD-1       0.5068     0.3984   1.272   0.2081  
## CohortnonPD1         -0.4517     0.8162  -0.553   0.5819  
## TimeCategoryoneWeek   1.1249     0.4932   2.281   0.0260 *
## TimeCategorylate      1.0483     0.4342   2.414   0.0187 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.54 on 62 degrees of freedom
##   (106 observations deleted due to missingness)
## Multiple R-squared:  0.1348, Adjusted R-squared:  0.07903 
## F-statistic: 2.416 on 4 and 62 DF,  p-value: 0.05812
summary(fit <- aov(formula = X1.Kd.HA ~ Cohort  + TimeCategory + Cohort:TimeCategory, data = mergedData)); tukey_hsd(fit) %>% print(n=42)
##                     Df Sum Sq Mean Sq F value Pr(>F)  
## Cohort               2   4.25   2.126   0.939 0.3969  
## TimeCategory         2  18.67   9.336   4.122 0.0212 *
## Cohort:TimeCategory  4  15.73   3.933   1.737 0.1543  
## Residuals           58 131.35   2.265                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 106 observations deleted due to missingness
## # A tibble: 42 x 9
##    term  group1 group2 null.value estimate conf.low conf.high  p.adj
##  * <chr> <chr>  <chr>       <dbl>    <dbl>    <dbl>     <dbl>  <dbl>
##  1 Coho~ Healt~ anti-~          0   0.357   -0.558      1.27  0.619 
##  2 Coho~ Healt~ nonPD1          0  -0.612   -2.52       1.30  0.724 
##  3 Coho~ anti-~ nonPD1          0  -0.968   -2.90       0.963 0.454 
##  4 Time~ basel~ oneWe~          0   1.09    -0.0485     2.22  0.0634
##  5 Time~ basel~ late            0   1.04     0.0219     2.06  0.0442
##  6 Time~ oneWe~ late            0  -0.0453  -1.22       1.13  0.995 
##  7 Coho~ Healt~ anti-~          0  -0.617   -2.52       1.29  0.98  
##  8 Coho~ Healt~ nonPD~          0  -0.595   -4.30       3.11  1     
##  9 Coho~ Healt~ Healt~          0   0.416   -1.61       2.44  0.999 
## 10 Coho~ Healt~ anti-~          0   1.55    -1.25       4.35  0.693 
## 11 Coho~ Healt~ nonPD~          0  -0.355   -5.40       4.69  1     
## 12 Coho~ Healt~ Healt~          0  -0.0118  -2.04       2.01  1     
## 13 Coho~ Healt~ anti-~          0   1.51    -0.517      3.53  0.304 
## 14 Coho~ Healt~ nonPD~          0  -0.377   -5.42       4.67  1     
## 15 Coho~ anti-~ nonPD~          0   0.0216  -3.64       3.69  1     
## 16 Coho~ anti-~ Healt~          0   1.03    -0.920      2.99  0.741 
## 17 Coho~ anti-~ anti-~          0   2.17    -0.583      4.91  0.236 
## 18 Coho~ anti-~ nonPD~          0   0.262   -4.76       5.28  1     
## 19 Coho~ anti-~ Healt~          0   0.605   -1.35       2.56  0.985 
## 20 Coho~ anti-~ anti-~          0   2.12     0.171      4.08  0.0233
## 21 Coho~ anti-~ nonPD~          0   0.240   -4.78       5.26  1     
## 22 Coho~ nonPD~ Healt~          0   1.01    -2.72       4.74  0.993 
## 23 Coho~ nonPD~ anti-~          0   2.14    -2.05       6.34  0.776 
## 24 Coho~ nonPD~ nonPD~          0   0.240   -5.70       6.18  1     
## 25 Coho~ nonPD~ Healt~          0   0.584   -3.14       4.31  1     
## 26 Coho~ nonPD~ anti-~          0   2.10    -1.62       5.83  0.671 
## 27 Coho~ nonPD~ nonPD~          0   0.219   -5.72       6.16  1     
## 28 Coho~ Healt~ anti-~          0   1.13    -1.70       3.96  0.931 
## 29 Coho~ Healt~ nonPD~          0  -0.771   -5.84       4.29  1     
## 30 Coho~ Healt~ Healt~          0  -0.428   -2.50       1.64  0.999 
## 31 Coho~ Healt~ anti-~          0   1.09    -0.976      3.16  0.744 
## 32 Coho~ Healt~ nonPD~          0  -0.793   -5.86       4.27  1     
## 33 Coho~ anti-~ nonPD~          0  -1.90    -7.32       3.52  0.967 
## 34 Coho~ anti-~ Healt~          0  -1.56    -4.39       1.27  0.697 
## 35 Coho~ anti-~ anti-~          0  -0.0412  -2.87       2.79  1     
## 36 Coho~ anti-~ nonPD~          0  -1.93    -7.35       3.50  0.965 
## 37 Coho~ nonPD~ Healt~          0   0.343   -4.72       5.41  1     
## 38 Coho~ nonPD~ anti-~          0   1.86    -3.20       6.93  0.957 
## 39 Coho~ nonPD~ nonPD~          0  -0.0218  -6.88       6.83  1     
## 40 Coho~ Healt~ anti-~          0   1.52    -0.548      3.59  0.321 
## 41 Coho~ Healt~ nonPD~          0  -0.365   -5.43       4.70  1     
## 42 Coho~ anti-~ nonPD~          0  -1.88    -6.95       3.18  0.954 
## # ... with 1 more variable: p.adj.signif <chr>
# write.csv(melted, file="D:/Pembro-Fluvac/Analysis/sourceDataExports/2h.csv")



subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "baseline")
univScatter(data = subsetData, xData = "Cycle.of.Immunotherapy", yData = "Lo.Hi.HA.affinity", fillParam = "TimeCategory", 
            title = "Affinity over time", xLabel= "Cycle of Immunotherapy", yLabel = "Lo Hi Affinity")  + scale_fill_manual(values=c("#FFB18C"))
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 30 rows containing non-finite values (stat_smooth).
## Warning: Removed 30 rows containing missing values (geom_point).

———– irAE analysis ——————–

index <- demog[which(demog$irAE == "Y" | demog$irAE == "N"),"Subject"]
mergedData.irAE <- mergedData[which(mergedData$Subject %in% index),]  
subsetData <- subset(mergedData.irAE, TimeCategory == "baseline" & Cohort == "anti-PD-1")
subsetData$dateDiff <- round( as.numeric(difftime(subsetData$DateFirstIRAE,  subsetData$DateFirstFlowFile, units = "days")),digits = 0)
subsetData <- subsetData[-which(is.na(subsetData$dateDiff)), ]           # zero out the ones who did not have irAE
subsetData$Subject <- factor(subsetData$Subject, levels = subsetData$Subject[order(subsetData$dateDiff, decreasing = T)] )
ggplot(subsetData, aes(x=dateDiff, y=Subject)) + 
  geom_bar(stat="Identity", width=0.15, fill="#ff9a6a", size=0.01, color="black") + geom_point(aes(size=irAEgradeWorst)) + 
  xlab("Days until flu vaccine") + ylab("Participant") + labs(size = "irAE grade") + ggtitle("Time since first irAE") + scale_size(range=c(2,7),) + 
  theme_bw() + theme(axis.text.x = element_text(color = "black", size=24), axis.title = element_text(size=24), plot.title = element_text(size=32),
                     axis.text.y = element_blank(), legend.text = element_text(size=16), legend.title = element_text(size=16), 
                     ) + theme(panel.grid.minor = element_blank(), panel.grid.major=element_blank()) + geom_vline(xintercept = 0, size=0.5)

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/irAE_dateDiff_irAEtoVaccine.pdf")
# write.csv(subsetData[,c("dbCode","dateDiff","irAEgradeWorst")], file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e7a.csv")



subsetData <- mergedData[which(!is.na(mergedData$irAE) & mergedData$irAE != "" ), ]
twoSampleBar(data = subsetData, yData="FCtfh_oW", yLabel = "Fold-change at one week", title="Post-vax\nICOS+CD38+ cTfh", xData = "irAE",fillParam = "irAE", nonparam = F) + 
  theme(axis.title.x = element_text(size=28),  axis.text.x = element_text(hjust = 0.5, vjust=0.5, angle=0)) +
  scale_fill_manual(values = c("grey90", "#ff9a6a")) 
## [1] "path 1:  ttest == T && batch == none && nonparam == F"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## Warning: Removed 2 rows containing missing values (position_quasirandom).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/irAE_FCtfh.pdf", width=3.5)
# write.csv(subsetData[,c("dbCode","TimeCategory","irAE","FCtfh_oW")], file="D:/Pembro-Fluvac/Analysis/sourceDataExports/4c.csv")

subsetData <- mergedData[which(!is.na(mergedData$irAE) & mergedData$irAE != "" ), ]
subsetData %>% group_by(irAE) %>% get_summary_stats(FCtfh_oW)
## # A tibble: 2 x 14
##   irAE  variable     n   min   max median    q1    q3   iqr   mad  mean    sd
##   <chr> <chr>    <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 N     FCtfh_oW     7 0.924  2.35   1.24  1.10  1.43  0.33 0.355  1.37 0.482
## 2 Y     FCtfh_oW    12 0.782  6.80   1.95  1.24  2.96  1.71 1.27   2.43 1.71 
## # ... with 2 more variables: se <dbl>, ci <dbl>
twoSampleBar(data = subsetData, yData="H1N1pdm09.HAI.titer", yLabel = "Baseline H1N1pdm09 titer", title="Baseline HAI", xData = "irAE",fillParam = "irAE")+ 
  theme(axis.title.x = element_text(size=28),  axis.text.x = element_text(hjust = 0.5, vjust=0.5, angle=0), plot.title = element_text(size=28)) + 
  scale_fill_manual(values = c("grey90", "#ff9a6a")) 
## [1] "path 1:  ttest == T && batch == none && nonparam == F"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.

twoSampleBar(data = subsetData, yData="FChai_late", yLabel = "FC HAI titer", title="Fold-change HAI", xData = "irAE",fillParam = "irAE")+ 
  scale_fill_manual(values = c("grey90", "#ff9a6a"))
## [1] "path 1:  ttest == T && batch == none && nonparam == F"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## Warning: Removed 5 rows containing missing values (position_quasirandom).

twoSampleBar(data = subsetData, yData="IgDlo_CD71hi_ActBCells...FreqParent", yLabel = "Baseline ABC (% CD71)", title="ABC", xData = "irAE",fillParam = "irAE")+ 
  scale_fill_manual(values = c("grey90", "#ff9a6a"))
## [1] "path 1:  ttest == T && batch == none && nonparam == F"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## Warning: Removed 5 rows containing missing values (position_quasirandom).

twoSampleBar(data = subsetData, yData="IgDlo_CD71hi_CD20loCD71hi...FreqParent", yLabel = "Baseline ASC (% CD71)", title="ASC", xData = "irAE",fillParam = "irAE")+ 
  scale_fill_manual(values = c("grey90", "#ff9a6a"))
## [1] "path 1:  ttest == T && batch == none && nonparam == F"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## Warning: Removed 5 rows containing missing values (position_quasirandom).

twoSampleBar(data = subsetData, yData="cTfh_ICOShiCD38hi_cTfh..._medfi.ICOS.", yLabel = "medianFI ICOS", title="baseline\nICOS+CD38+ cTfh", xData = "irAE",fillParam = "irAE")+ 
  scale_fill_manual(values = c("grey90", "#ff9a6a")) + theme(axis.title.x = element_text(size=28),  axis.text.x = element_text(hjust = 0.5, vjust=0.5, angle=0))
## [1] "path 1:  ttest == T && batch == none && nonparam == F"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## Warning: Removed 5 rows containing missing values (position_quasirandom).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/irAE_ICOS.pdf", width=4)
# write.csv(subsetData[,c("dbCode","TimeCategory","irAE","cTfh_ICOShiCD38hi_cTfh..._medfi.ICOS.")], file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e7d.csv")

twoSampleBar(data = subsetData, yData="cTfh_ICOShiCD38hi_cTfh..._medfi.aIgG4..batchEffect", yLabel = "medianFI aIgG4", title="baseline\nICOS+CD38+ cTfh", xData = "irAE",fillParam = "irAE")+ 
  scale_fill_manual(values = c("grey90", "#ff9a6a"))  + theme(axis.title.x = element_text(size=28),  axis.text.x = element_text(hjust = 0.5, vjust=0.5, angle=0)) # + geom_label_repel(aes(label = Subject),size=3)
## [1] "path 1:  ttest == T && batch == none && nonparam == F"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## Warning: Removed 5 rows containing missing values (position_quasirandom).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/irAE_aIgG4.pdf", width=4)
# write.csv(subsetData[,c("dbCode","TimeCategory","irAE","cTfh_ICOShiCD38hi_cTfh..._medfi.aIgG4..batchEffect")], file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e7e.csv")




subsetData <- mergedData[which(!is.na(mergedData$irAE) & mergedData$irAE != "" ), ]
twoSampleBar(data = subsetData, yData="IgG1_Total.sialylated", yLabel = "Sialylation (% anti-H1 IgG1)", title="Sialylation", xData = "irAE",fillParam = "irAE")+ 
  scale_fill_manual(values = c("grey90", "#ff9a6a")) + theme(axis.title.x = element_text(size=28))
## [1] "path 1:  ttest == T && batch == none && nonparam == F"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.

twoSampleBar(data = subsetData, yData="IgG1sial_oW", yLabel = "Fold-change sialylation", title="Fold-change sialylation", xData = "irAE",fillParam = "irAE")+ 
  scale_fill_manual(values = c("grey90", "#ff9a6a")) + theme(axis.title.x = element_text(size=28))
## [1] "path 1:  ttest == T && batch == none && nonparam == F"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## Warning: Removed 1 rows containing missing values (position_quasirandom).

twoSampleBar(data = subsetData, yData="IgG1_Total.Galactosylation..G1.G2.", yLabel = "Galactosylation (% anti-H1 IgG1)", title="Galactosylation", xData = "irAE",fillParam = "irAE")+ 
  scale_fill_manual(values = c("grey90", "#ff9a6a")) + theme(axis.title.x = element_text(size=28))
## [1] "path 1:  ttest == T && batch == none && nonparam == F"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.

twoSampleBar(data = subsetData, yData="Lo.Hi.HA.affinity", yLabel = "Lo Hi Affinity", title="Affinity", xData = "irAE",fillParam = "irAE") + 
  scale_fill_manual(values = c("grey90", "#ff9a6a")) + theme(axis.title.x = element_text(size=28))
## [1] "path 1:  ttest == T && batch == none && nonparam == F"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.

subsetData <- mergedData[which(!is.na(mergedData$irAE) & mergedData$irAE != "" & mergedData$TimeCategory == "baseline" & mergedData$Cohort == "anti-PD-1"), ]
twoSampleBar(data = subsetData, yData="cTfh_ICOShiCD38hi_..FreqParent", yLabel = expression(paste("ICO",S^"+", "CD3",8^"+"," (% cTfh)")), 
             title="Baseline\nICOS+CD38+ cTfh", xData = "irAE", fillParam = "irAE", nonparam = T) + 
  theme(axis.title.x = element_text(size=28),  axis.text.x = element_text(hjust = 0.5, vjust=0.5, angle=0)) +
  scale_fill_manual(values = c("grey90", "#ff9a6a")) 
## [1] "path 2:  ttest == T && batch == none && nonparam == T"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/irAE_cTfh_Baseline.pdf", width=3.5)
# write.csv(subsetData[,c("dbCode","TimeCategory","irAE","cTfh_ICOShiCD38hi_..FreqParent")], file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e7j.csv")



subsetData <- subset(mergedData, Cohort != "nonPD1" )
twoSampleBar(data = subsetData, yData="ANA.titer", yLabel = "ANA titer", title="Anti-nuclear\nantibodies", xData = "Cohort",fillParam = "Cohort", nonparam = T)
## [1] "path 2:  ttest == T && batch == none && nonparam == T"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"
## Warning: Removed 111 rows containing missing values (position_quasirandom).

  # ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/ANA_baseline_fullGroup.pdf", width=4)
# write.csv(subsetData[,c("dbCode","Cohort","TimeCategory","ANA.titer")], file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e7k-1.csv")

twoSampleBar(data = subsetData, yData="Anti.dsDNA.titer", yLabel = "Anti-dsDNA titer", title="Anti-dsDNA\nantibodies", xData = "Cohort",fillParam = "Cohort", nonparam = T)
## [1] "path 2:  ttest == T && batch == none && nonparam == T"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"
## Warning: Removed 111 rows containing missing values (position_quasirandom).

  # ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/Anti-dsDNA_baseline_fullGroup.pdf", width=4)
# write.csv(subsetData[,c("dbCode","Cohort","TimeCategory","Anti.dsDNA.titer")], file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e7k-2.csv")

subsetData <- mergedData[which(!is.na(mergedData$irAE) & mergedData$irAE != "" ), ]
twoSampleBar(data = subsetData, yData="ANA.titer", yLabel = "ANA titer", title="Anti-nuclear\nantibodies", xData = "irAE",fillParam = "irAE", nonparam = T, position = "right") +
  scale_fill_manual(values = c("grey90", "#ff9a6a")) + theme(axis.title.x = element_text(size=28))
## [1] "path 2:  ttest == T && batch == none && nonparam == T"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/irAE_ANA_baseline.pdf", width=4)
# write.csv(subsetData[,c("dbCode","TimeCategory","irAE","ANA.titer")], file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e7l-1.csv")

twoSampleBar(data = subsetData, yData="Anti.dsDNA.titer", yLabel = "Anti-dsDNA titer", title="Anti-dsDNA\nantibodies", xData = "irAE",fillParam = "irAE", nonparam=T, position = "right") +
  scale_fill_manual(values = c("grey90", "#ff9a6a")) + theme(axis.title.x = element_text(size=28))
## [1] "path 2:  ttest == T && batch == none && nonparam == T"
## [1] "path 5:  ! is.nan(pValue) && confInt == F"
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.

  # ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/irAE_anti-dsDNA_baseline.pdf", width=4)
# write.csv(subsetData[,c("dbCode","TimeCategory","irAE","Anti.dsDNA.titer")], file="D:/Pembro-Fluvac/Analysis/sourceDataExports/e7l-2.csv")